f6c4eb40e6c471010adeeb7831f81a0a2e25a3ef
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "output.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "diagnostic-core.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "gimple.h"
51 #include "dwarf2.h"
52 #include "df.h"
53 #include "tm-constrs.h"
54 #include "params.h"
55 #include "cselib.h"
56 #include "debug.h"
57 #include "dwarf2out.h"
58 #include "sched-int.h"
59 static rtx legitimize_dllimport_symbol (rtx, bool);
60
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
63 #endif
64
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
71 : 4)
72
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
76
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
78
79 const
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
98 0, /* "large" insn */
99 2, /* MOVE_RATIO */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
125 2, /* Branch cost */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
147 };
148
149 /* Processor costs (relative to an add) */
150 static const
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
170 3, /* MOVE_RATIO */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
196 1, /* Branch cost */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
218 };
219
220 static const
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
240 3, /* MOVE_RATIO */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
268 1, /* Branch cost */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
290 };
291
292 static const
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
312 6, /* MOVE_RATIO */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
338 2, /* Branch cost */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
360 };
361
362 static const
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
382 6, /* MOVE_RATIO */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
408 2, /* Branch cost */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
416 (we ensure the alignment). For small blocks inline loop is still a
417 noticeable win, for bigger blocks either rep movsl or rep movsb is
418 way to go. Rep movsb has apparently more expensive startup time in CPU,
419 but after 4K the difference is down in the noise. */
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
437 };
438
439 static const
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
459 4, /* MOVE_RATIO */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
470
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
486 1, /* Branch cost */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
508 };
509
510 static const
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
530 4, /* MOVE_RATIO */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
559 1, /* Branch cost */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
581 };
582
583 static const
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
603 9, /* MOVE_RATIO */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
629 5, /* Branch cost */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
654 };
655
656 static const
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
676 9, /* MOVE_RATIO */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
705 time). */
706 100, /* number of parallel prefetches */
707 3, /* Branch cost */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very
715 small blocks it is better to use loop. For large blocks, libcall can
716 do nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
733 };
734
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
754 9, /* MOVE_RATIO */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
776 /* On K8:
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
779 On AMDFAM10:
780 MOVD reg64, xmmreg Double FADD 3
781 1/1 1/1
782 MOVD reg32, xmmreg Double FADD 3
783 1/1 1/1 */
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
791 time). */
792 100, /* number of parallel prefetches */
793 2, /* Branch cost */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
820 };
821
822 struct processor_costs bdver1_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (2), /* cost of a lea instruction */
825 COSTS_N_INSNS (1), /* variable shift costs */
826 COSTS_N_INSNS (1), /* constant shift costs */
827 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (4), /* HI */
829 COSTS_N_INSNS (3), /* SI */
830 COSTS_N_INSNS (4), /* DI */
831 COSTS_N_INSNS (5)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (35), /* HI */
835 COSTS_N_INSNS (51), /* SI */
836 COSTS_N_INSNS (83), /* DI */
837 COSTS_N_INSNS (83)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 8, /* "large" insn */
841 9, /* MOVE_RATIO */
842 4, /* cost for loading QImode using movzbl */
843 {3, 4, 3}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {3, 4, 3}, /* cost of storing integer registers */
847 4, /* cost of reg,reg fld/fst */
848 {4, 4, 12}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {6, 6, 8}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {3, 3}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {4, 4}, /* cost of storing MMX registers
856 in SImode and DImode */
857 2, /* cost of moving SSE register */
858 {4, 4, 3}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {4, 4, 5}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 3, /* MMX or SSE register to integer */
863 /* On K8:
864 MOVD reg64, xmmreg Double FSTORE 4
865 MOVD reg32, xmmreg Double FSTORE 4
866 On AMDFAM10:
867 MOVD reg64, xmmreg Double FADD 3
868 1/1 1/1
869 MOVD reg32, xmmreg Double FADD 3
870 1/1 1/1 */
871 64, /* size of l1 cache. */
872 1024, /* size of l2 cache. */
873 64, /* size of prefetch block */
874 /* New AMD processors never drop prefetches; if they cannot be performed
875 immediately, they are queued. We set number of simultaneous prefetches
876 to a large constant to reflect this (it probably is not a good idea not
877 to limit number of prefetches at all, as their execution also takes some
878 time). */
879 100, /* number of parallel prefetches */
880 2, /* Branch cost */
881 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
882 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
883 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
884 COSTS_N_INSNS (2), /* cost of FABS instruction. */
885 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
886 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
887
888 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
889 very small blocks it is better to use loop. For large blocks, libcall
890 can do nontemporary accesses and beat inline considerably. */
891 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
892 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
893 {{libcall, {{8, loop}, {24, unrolled_loop},
894 {2048, rep_prefix_4_byte}, {-1, libcall}}},
895 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
896 4, /* scalar_stmt_cost. */
897 2, /* scalar load_cost. */
898 2, /* scalar_store_cost. */
899 6, /* vec_stmt_cost. */
900 0, /* vec_to_scalar_cost. */
901 2, /* scalar_to_vec_cost. */
902 2, /* vec_align_load_cost. */
903 2, /* vec_unalign_load_cost. */
904 2, /* vec_store_cost. */
905 2, /* cond_taken_branch_cost. */
906 1, /* cond_not_taken_branch_cost. */
907 };
908
909 static const
910 struct processor_costs pentium4_cost = {
911 COSTS_N_INSNS (1), /* cost of an add instruction */
912 COSTS_N_INSNS (3), /* cost of a lea instruction */
913 COSTS_N_INSNS (4), /* variable shift costs */
914 COSTS_N_INSNS (4), /* constant shift costs */
915 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
916 COSTS_N_INSNS (15), /* HI */
917 COSTS_N_INSNS (15), /* SI */
918 COSTS_N_INSNS (15), /* DI */
919 COSTS_N_INSNS (15)}, /* other */
920 0, /* cost of multiply per each bit set */
921 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
922 COSTS_N_INSNS (56), /* HI */
923 COSTS_N_INSNS (56), /* SI */
924 COSTS_N_INSNS (56), /* DI */
925 COSTS_N_INSNS (56)}, /* other */
926 COSTS_N_INSNS (1), /* cost of movsx */
927 COSTS_N_INSNS (1), /* cost of movzx */
928 16, /* "large" insn */
929 6, /* MOVE_RATIO */
930 2, /* cost for loading QImode using movzbl */
931 {4, 5, 4}, /* cost of loading integer registers
932 in QImode, HImode and SImode.
933 Relative to reg-reg move (2). */
934 {2, 3, 2}, /* cost of storing integer registers */
935 2, /* cost of reg,reg fld/fst */
936 {2, 2, 6}, /* cost of loading fp registers
937 in SFmode, DFmode and XFmode */
938 {4, 4, 6}, /* cost of storing fp registers
939 in SFmode, DFmode and XFmode */
940 2, /* cost of moving MMX register */
941 {2, 2}, /* cost of loading MMX registers
942 in SImode and DImode */
943 {2, 2}, /* cost of storing MMX registers
944 in SImode and DImode */
945 12, /* cost of moving SSE register */
946 {12, 12, 12}, /* cost of loading SSE registers
947 in SImode, DImode and TImode */
948 {2, 2, 8}, /* cost of storing SSE registers
949 in SImode, DImode and TImode */
950 10, /* MMX or SSE register to integer */
951 8, /* size of l1 cache. */
952 256, /* size of l2 cache. */
953 64, /* size of prefetch block */
954 6, /* number of parallel prefetches */
955 2, /* Branch cost */
956 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
957 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
958 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
959 COSTS_N_INSNS (2), /* cost of FABS instruction. */
960 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
961 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
962 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
963 DUMMY_STRINGOP_ALGS},
964 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
965 {-1, libcall}}},
966 DUMMY_STRINGOP_ALGS},
967 1, /* scalar_stmt_cost. */
968 1, /* scalar load_cost. */
969 1, /* scalar_store_cost. */
970 1, /* vec_stmt_cost. */
971 1, /* vec_to_scalar_cost. */
972 1, /* scalar_to_vec_cost. */
973 1, /* vec_align_load_cost. */
974 2, /* vec_unalign_load_cost. */
975 1, /* vec_store_cost. */
976 3, /* cond_taken_branch_cost. */
977 1, /* cond_not_taken_branch_cost. */
978 };
979
980 static const
981 struct processor_costs nocona_cost = {
982 COSTS_N_INSNS (1), /* cost of an add instruction */
983 COSTS_N_INSNS (1), /* cost of a lea instruction */
984 COSTS_N_INSNS (1), /* variable shift costs */
985 COSTS_N_INSNS (1), /* constant shift costs */
986 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
987 COSTS_N_INSNS (10), /* HI */
988 COSTS_N_INSNS (10), /* SI */
989 COSTS_N_INSNS (10), /* DI */
990 COSTS_N_INSNS (10)}, /* other */
991 0, /* cost of multiply per each bit set */
992 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
993 COSTS_N_INSNS (66), /* HI */
994 COSTS_N_INSNS (66), /* SI */
995 COSTS_N_INSNS (66), /* DI */
996 COSTS_N_INSNS (66)}, /* other */
997 COSTS_N_INSNS (1), /* cost of movsx */
998 COSTS_N_INSNS (1), /* cost of movzx */
999 16, /* "large" insn */
1000 17, /* MOVE_RATIO */
1001 4, /* cost for loading QImode using movzbl */
1002 {4, 4, 4}, /* cost of loading integer registers
1003 in QImode, HImode and SImode.
1004 Relative to reg-reg move (2). */
1005 {4, 4, 4}, /* cost of storing integer registers */
1006 3, /* cost of reg,reg fld/fst */
1007 {12, 12, 12}, /* cost of loading fp registers
1008 in SFmode, DFmode and XFmode */
1009 {4, 4, 4}, /* cost of storing fp registers
1010 in SFmode, DFmode and XFmode */
1011 6, /* cost of moving MMX register */
1012 {12, 12}, /* cost of loading MMX registers
1013 in SImode and DImode */
1014 {12, 12}, /* cost of storing MMX registers
1015 in SImode and DImode */
1016 6, /* cost of moving SSE register */
1017 {12, 12, 12}, /* cost of loading SSE registers
1018 in SImode, DImode and TImode */
1019 {12, 12, 12}, /* cost of storing SSE registers
1020 in SImode, DImode and TImode */
1021 8, /* MMX or SSE register to integer */
1022 8, /* size of l1 cache. */
1023 1024, /* size of l2 cache. */
1024 128, /* size of prefetch block */
1025 8, /* number of parallel prefetches */
1026 1, /* Branch cost */
1027 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1028 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1029 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1030 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1031 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1032 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1033 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1034 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1035 {100000, unrolled_loop}, {-1, libcall}}}},
1036 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1037 {-1, libcall}}},
1038 {libcall, {{24, loop}, {64, unrolled_loop},
1039 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1040 1, /* scalar_stmt_cost. */
1041 1, /* scalar load_cost. */
1042 1, /* scalar_store_cost. */
1043 1, /* vec_stmt_cost. */
1044 1, /* vec_to_scalar_cost. */
1045 1, /* scalar_to_vec_cost. */
1046 1, /* vec_align_load_cost. */
1047 2, /* vec_unalign_load_cost. */
1048 1, /* vec_store_cost. */
1049 3, /* cond_taken_branch_cost. */
1050 1, /* cond_not_taken_branch_cost. */
1051 };
1052
1053 static const
1054 struct processor_costs core2_cost = {
1055 COSTS_N_INSNS (1), /* cost of an add instruction */
1056 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1057 COSTS_N_INSNS (1), /* variable shift costs */
1058 COSTS_N_INSNS (1), /* constant shift costs */
1059 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1060 COSTS_N_INSNS (3), /* HI */
1061 COSTS_N_INSNS (3), /* SI */
1062 COSTS_N_INSNS (3), /* DI */
1063 COSTS_N_INSNS (3)}, /* other */
1064 0, /* cost of multiply per each bit set */
1065 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
1066 COSTS_N_INSNS (22), /* HI */
1067 COSTS_N_INSNS (22), /* SI */
1068 COSTS_N_INSNS (22), /* DI */
1069 COSTS_N_INSNS (22)}, /* other */
1070 COSTS_N_INSNS (1), /* cost of movsx */
1071 COSTS_N_INSNS (1), /* cost of movzx */
1072 8, /* "large" insn */
1073 16, /* MOVE_RATIO */
1074 2, /* cost for loading QImode using movzbl */
1075 {6, 6, 6}, /* cost of loading integer registers
1076 in QImode, HImode and SImode.
1077 Relative to reg-reg move (2). */
1078 {4, 4, 4}, /* cost of storing integer registers */
1079 2, /* cost of reg,reg fld/fst */
1080 {6, 6, 6}, /* cost of loading fp registers
1081 in SFmode, DFmode and XFmode */
1082 {4, 4, 4}, /* cost of storing fp registers
1083 in SFmode, DFmode and XFmode */
1084 2, /* cost of moving MMX register */
1085 {6, 6}, /* cost of loading MMX registers
1086 in SImode and DImode */
1087 {4, 4}, /* cost of storing MMX registers
1088 in SImode and DImode */
1089 2, /* cost of moving SSE register */
1090 {6, 6, 6}, /* cost of loading SSE registers
1091 in SImode, DImode and TImode */
1092 {4, 4, 4}, /* cost of storing SSE registers
1093 in SImode, DImode and TImode */
1094 2, /* MMX or SSE register to integer */
1095 32, /* size of l1 cache. */
1096 2048, /* size of l2 cache. */
1097 128, /* size of prefetch block */
1098 8, /* number of parallel prefetches */
1099 3, /* Branch cost */
1100 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1101 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1102 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1103 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1104 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1105 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1106 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1107 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1108 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1109 {{libcall, {{8, loop}, {15, unrolled_loop},
1110 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1111 {libcall, {{24, loop}, {32, unrolled_loop},
1112 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1113 1, /* scalar_stmt_cost. */
1114 1, /* scalar load_cost. */
1115 1, /* scalar_store_cost. */
1116 1, /* vec_stmt_cost. */
1117 1, /* vec_to_scalar_cost. */
1118 1, /* scalar_to_vec_cost. */
1119 1, /* vec_align_load_cost. */
1120 2, /* vec_unalign_load_cost. */
1121 1, /* vec_store_cost. */
1122 3, /* cond_taken_branch_cost. */
1123 1, /* cond_not_taken_branch_cost. */
1124 };
1125
1126 static const
1127 struct processor_costs atom_cost = {
1128 COSTS_N_INSNS (1), /* cost of an add instruction */
1129 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1130 COSTS_N_INSNS (1), /* variable shift costs */
1131 COSTS_N_INSNS (1), /* constant shift costs */
1132 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1133 COSTS_N_INSNS (4), /* HI */
1134 COSTS_N_INSNS (3), /* SI */
1135 COSTS_N_INSNS (4), /* DI */
1136 COSTS_N_INSNS (2)}, /* other */
1137 0, /* cost of multiply per each bit set */
1138 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1139 COSTS_N_INSNS (26), /* HI */
1140 COSTS_N_INSNS (42), /* SI */
1141 COSTS_N_INSNS (74), /* DI */
1142 COSTS_N_INSNS (74)}, /* other */
1143 COSTS_N_INSNS (1), /* cost of movsx */
1144 COSTS_N_INSNS (1), /* cost of movzx */
1145 8, /* "large" insn */
1146 17, /* MOVE_RATIO */
1147 2, /* cost for loading QImode using movzbl */
1148 {4, 4, 4}, /* cost of loading integer registers
1149 in QImode, HImode and SImode.
1150 Relative to reg-reg move (2). */
1151 {4, 4, 4}, /* cost of storing integer registers */
1152 4, /* cost of reg,reg fld/fst */
1153 {12, 12, 12}, /* cost of loading fp registers
1154 in SFmode, DFmode and XFmode */
1155 {6, 6, 8}, /* cost of storing fp registers
1156 in SFmode, DFmode and XFmode */
1157 2, /* cost of moving MMX register */
1158 {8, 8}, /* cost of loading MMX registers
1159 in SImode and DImode */
1160 {8, 8}, /* cost of storing MMX registers
1161 in SImode and DImode */
1162 2, /* cost of moving SSE register */
1163 {8, 8, 8}, /* cost of loading SSE registers
1164 in SImode, DImode and TImode */
1165 {8, 8, 8}, /* cost of storing SSE registers
1166 in SImode, DImode and TImode */
1167 5, /* MMX or SSE register to integer */
1168 32, /* size of l1 cache. */
1169 256, /* size of l2 cache. */
1170 64, /* size of prefetch block */
1171 6, /* number of parallel prefetches */
1172 3, /* Branch cost */
1173 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1174 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1175 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1176 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1177 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1178 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1179 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1180 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1181 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1182 {{libcall, {{8, loop}, {15, unrolled_loop},
1183 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1184 {libcall, {{24, loop}, {32, unrolled_loop},
1185 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1186 1, /* scalar_stmt_cost. */
1187 1, /* scalar load_cost. */
1188 1, /* scalar_store_cost. */
1189 1, /* vec_stmt_cost. */
1190 1, /* vec_to_scalar_cost. */
1191 1, /* scalar_to_vec_cost. */
1192 1, /* vec_align_load_cost. */
1193 2, /* vec_unalign_load_cost. */
1194 1, /* vec_store_cost. */
1195 3, /* cond_taken_branch_cost. */
1196 1, /* cond_not_taken_branch_cost. */
1197 };
1198
1199 /* Generic64 should produce code tuned for Nocona and K8. */
1200 static const
1201 struct processor_costs generic64_cost = {
1202 COSTS_N_INSNS (1), /* cost of an add instruction */
1203 /* On all chips taken into consideration lea is 2 cycles and more. With
1204 this cost however our current implementation of synth_mult results in
1205 use of unnecessary temporary registers causing regression on several
1206 SPECfp benchmarks. */
1207 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1208 COSTS_N_INSNS (1), /* variable shift costs */
1209 COSTS_N_INSNS (1), /* constant shift costs */
1210 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1211 COSTS_N_INSNS (4), /* HI */
1212 COSTS_N_INSNS (3), /* SI */
1213 COSTS_N_INSNS (4), /* DI */
1214 COSTS_N_INSNS (2)}, /* other */
1215 0, /* cost of multiply per each bit set */
1216 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1217 COSTS_N_INSNS (26), /* HI */
1218 COSTS_N_INSNS (42), /* SI */
1219 COSTS_N_INSNS (74), /* DI */
1220 COSTS_N_INSNS (74)}, /* other */
1221 COSTS_N_INSNS (1), /* cost of movsx */
1222 COSTS_N_INSNS (1), /* cost of movzx */
1223 8, /* "large" insn */
1224 17, /* MOVE_RATIO */
1225 4, /* cost for loading QImode using movzbl */
1226 {4, 4, 4}, /* cost of loading integer registers
1227 in QImode, HImode and SImode.
1228 Relative to reg-reg move (2). */
1229 {4, 4, 4}, /* cost of storing integer registers */
1230 4, /* cost of reg,reg fld/fst */
1231 {12, 12, 12}, /* cost of loading fp registers
1232 in SFmode, DFmode and XFmode */
1233 {6, 6, 8}, /* cost of storing fp registers
1234 in SFmode, DFmode and XFmode */
1235 2, /* cost of moving MMX register */
1236 {8, 8}, /* cost of loading MMX registers
1237 in SImode and DImode */
1238 {8, 8}, /* cost of storing MMX registers
1239 in SImode and DImode */
1240 2, /* cost of moving SSE register */
1241 {8, 8, 8}, /* cost of loading SSE registers
1242 in SImode, DImode and TImode */
1243 {8, 8, 8}, /* cost of storing SSE registers
1244 in SImode, DImode and TImode */
1245 5, /* MMX or SSE register to integer */
1246 32, /* size of l1 cache. */
1247 512, /* size of l2 cache. */
1248 64, /* size of prefetch block */
1249 6, /* number of parallel prefetches */
1250 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1251 value is increased to perhaps more appropriate value of 5. */
1252 3, /* Branch cost */
1253 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1254 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1255 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1256 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1257 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1258 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1259 {DUMMY_STRINGOP_ALGS,
1260 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1261 {DUMMY_STRINGOP_ALGS,
1262 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1263 1, /* scalar_stmt_cost. */
1264 1, /* scalar load_cost. */
1265 1, /* scalar_store_cost. */
1266 1, /* vec_stmt_cost. */
1267 1, /* vec_to_scalar_cost. */
1268 1, /* scalar_to_vec_cost. */
1269 1, /* vec_align_load_cost. */
1270 2, /* vec_unalign_load_cost. */
1271 1, /* vec_store_cost. */
1272 3, /* cond_taken_branch_cost. */
1273 1, /* cond_not_taken_branch_cost. */
1274 };
1275
1276 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1277 Athlon and K8. */
1278 static const
1279 struct processor_costs generic32_cost = {
1280 COSTS_N_INSNS (1), /* cost of an add instruction */
1281 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1282 COSTS_N_INSNS (1), /* variable shift costs */
1283 COSTS_N_INSNS (1), /* constant shift costs */
1284 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1285 COSTS_N_INSNS (4), /* HI */
1286 COSTS_N_INSNS (3), /* SI */
1287 COSTS_N_INSNS (4), /* DI */
1288 COSTS_N_INSNS (2)}, /* other */
1289 0, /* cost of multiply per each bit set */
1290 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1291 COSTS_N_INSNS (26), /* HI */
1292 COSTS_N_INSNS (42), /* SI */
1293 COSTS_N_INSNS (74), /* DI */
1294 COSTS_N_INSNS (74)}, /* other */
1295 COSTS_N_INSNS (1), /* cost of movsx */
1296 COSTS_N_INSNS (1), /* cost of movzx */
1297 8, /* "large" insn */
1298 17, /* MOVE_RATIO */
1299 4, /* cost for loading QImode using movzbl */
1300 {4, 4, 4}, /* cost of loading integer registers
1301 in QImode, HImode and SImode.
1302 Relative to reg-reg move (2). */
1303 {4, 4, 4}, /* cost of storing integer registers */
1304 4, /* cost of reg,reg fld/fst */
1305 {12, 12, 12}, /* cost of loading fp registers
1306 in SFmode, DFmode and XFmode */
1307 {6, 6, 8}, /* cost of storing fp registers
1308 in SFmode, DFmode and XFmode */
1309 2, /* cost of moving MMX register */
1310 {8, 8}, /* cost of loading MMX registers
1311 in SImode and DImode */
1312 {8, 8}, /* cost of storing MMX registers
1313 in SImode and DImode */
1314 2, /* cost of moving SSE register */
1315 {8, 8, 8}, /* cost of loading SSE registers
1316 in SImode, DImode and TImode */
1317 {8, 8, 8}, /* cost of storing SSE registers
1318 in SImode, DImode and TImode */
1319 5, /* MMX or SSE register to integer */
1320 32, /* size of l1 cache. */
1321 256, /* size of l2 cache. */
1322 64, /* size of prefetch block */
1323 6, /* number of parallel prefetches */
1324 3, /* Branch cost */
1325 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1326 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1327 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1328 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1329 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1330 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1331 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1332 DUMMY_STRINGOP_ALGS},
1333 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1334 DUMMY_STRINGOP_ALGS},
1335 1, /* scalar_stmt_cost. */
1336 1, /* scalar load_cost. */
1337 1, /* scalar_store_cost. */
1338 1, /* vec_stmt_cost. */
1339 1, /* vec_to_scalar_cost. */
1340 1, /* scalar_to_vec_cost. */
1341 1, /* vec_align_load_cost. */
1342 2, /* vec_unalign_load_cost. */
1343 1, /* vec_store_cost. */
1344 3, /* cond_taken_branch_cost. */
1345 1, /* cond_not_taken_branch_cost. */
1346 };
1347
1348 const struct processor_costs *ix86_cost = &pentium_cost;
1349
1350 /* Processor feature/optimization bitmasks. */
1351 #define m_386 (1<<PROCESSOR_I386)
1352 #define m_486 (1<<PROCESSOR_I486)
1353 #define m_PENT (1<<PROCESSOR_PENTIUM)
1354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1355 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1356 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1357 #define m_CORE2 (1<<PROCESSOR_CORE2)
1358 #define m_ATOM (1<<PROCESSOR_ATOM)
1359
1360 #define m_GEODE (1<<PROCESSOR_GEODE)
1361 #define m_K6 (1<<PROCESSOR_K6)
1362 #define m_K6_GEODE (m_K6 | m_GEODE)
1363 #define m_K8 (1<<PROCESSOR_K8)
1364 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1365 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1366 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1367 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1368 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1)
1369
1370 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1371 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1372
1373 /* Generic instruction choice should be common subset of supported CPUs
1374 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1375 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1376
1377 /* Feature tests against the various tunings. */
1378 unsigned char ix86_tune_features[X86_TUNE_LAST];
1379
1380 /* Feature tests against the various tunings used to create ix86_tune_features
1381 based on the processor mask. */
1382 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1383 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1384 negatively, so enabling for Generic64 seems like good code size
1385 tradeoff. We can't enable it for 32bit generic because it does not
1386 work well with PPro base chips. */
1387 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1388
1389 /* X86_TUNE_PUSH_MEMORY */
1390 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1391 | m_NOCONA | m_CORE2 | m_GENERIC,
1392
1393 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1394 m_486 | m_PENT,
1395
1396 /* X86_TUNE_UNROLL_STRLEN */
1397 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1398 | m_CORE2 | m_GENERIC,
1399
1400 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1401 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1402
1403 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1404 on simulation result. But after P4 was made, no performance benefit
1405 was observed with branch hints. It also increases the code size.
1406 As a result, icc never generates branch hints. */
1407 0,
1408
1409 /* X86_TUNE_DOUBLE_WITH_ADD */
1410 ~m_386,
1411
1412 /* X86_TUNE_USE_SAHF */
1413 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_PENT4
1414 | m_NOCONA | m_CORE2 | m_GENERIC,
1415
1416 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1417 partial dependencies. */
1418 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1419 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1420
1421 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1422 register stalls on Generic32 compilation setting as well. However
1423 in current implementation the partial register stalls are not eliminated
1424 very well - they can be introduced via subregs synthesized by combine
1425 and can happen in caller/callee saving sequences. Because this option
1426 pays back little on PPro based chips and is in conflict with partial reg
1427 dependencies used by Athlon/P4 based chips, it is better to leave it off
1428 for generic32 for now. */
1429 m_PPRO,
1430
1431 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1432 m_CORE2 | m_GENERIC,
1433
1434 /* X86_TUNE_USE_HIMODE_FIOP */
1435 m_386 | m_486 | m_K6_GEODE,
1436
1437 /* X86_TUNE_USE_SIMODE_FIOP */
1438 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1439
1440 /* X86_TUNE_USE_MOV0 */
1441 m_K6,
1442
1443 /* X86_TUNE_USE_CLTD */
1444 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1445
1446 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1447 m_PENT4,
1448
1449 /* X86_TUNE_SPLIT_LONG_MOVES */
1450 m_PPRO,
1451
1452 /* X86_TUNE_READ_MODIFY_WRITE */
1453 ~m_PENT,
1454
1455 /* X86_TUNE_READ_MODIFY */
1456 ~(m_PENT | m_PPRO),
1457
1458 /* X86_TUNE_PROMOTE_QIMODE */
1459 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1460 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1461
1462 /* X86_TUNE_FAST_PREFIX */
1463 ~(m_PENT | m_486 | m_386),
1464
1465 /* X86_TUNE_SINGLE_STRINGOP */
1466 m_386 | m_PENT4 | m_NOCONA,
1467
1468 /* X86_TUNE_QIMODE_MATH */
1469 ~0,
1470
1471 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1472 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1473 might be considered for Generic32 if our scheme for avoiding partial
1474 stalls was more effective. */
1475 ~m_PPRO,
1476
1477 /* X86_TUNE_PROMOTE_QI_REGS */
1478 0,
1479
1480 /* X86_TUNE_PROMOTE_HI_REGS */
1481 m_PPRO,
1482
1483 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1484 over esp addition. */
1485 m_386 | m_486 | m_PENT | m_PPRO,
1486
1487 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1488 over esp addition. */
1489 m_PENT,
1490
1491 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1492 over esp subtraction. */
1493 m_386 | m_486 | m_PENT | m_K6_GEODE,
1494
1495 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1496 over esp subtraction. */
1497 m_PENT | m_K6_GEODE,
1498
1499 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1500 for DFmode copies */
1501 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1502 | m_GENERIC | m_GEODE),
1503
1504 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1505 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1506
1507 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1508 conflict here in between PPro/Pentium4 based chips that thread 128bit
1509 SSE registers as single units versus K8 based chips that divide SSE
1510 registers to two 64bit halves. This knob promotes all store destinations
1511 to be 128bit to allow register renaming on 128bit SSE units, but usually
1512 results in one extra microop on 64bit SSE units. Experimental results
1513 shows that disabling this option on P4 brings over 20% SPECfp regression,
1514 while enabling it on K8 brings roughly 2.4% regression that can be partly
1515 masked by careful scheduling of moves. */
1516 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1517 | m_AMDFAM10 | m_BDVER1,
1518
1519 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1520 m_AMDFAM10 | m_BDVER1,
1521
1522 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1523 m_BDVER1,
1524
1525 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1526 m_BDVER1,
1527
1528 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1529 are resolved on SSE register parts instead of whole registers, so we may
1530 maintain just lower part of scalar values in proper format leaving the
1531 upper part undefined. */
1532 m_ATHLON_K8,
1533
1534 /* X86_TUNE_SSE_TYPELESS_STORES */
1535 m_AMD_MULTIPLE,
1536
1537 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1538 m_PPRO | m_PENT4 | m_NOCONA,
1539
1540 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1541 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1542
1543 /* X86_TUNE_PROLOGUE_USING_MOVE */
1544 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1545
1546 /* X86_TUNE_EPILOGUE_USING_MOVE */
1547 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1548
1549 /* X86_TUNE_SHIFT1 */
1550 ~m_486,
1551
1552 /* X86_TUNE_USE_FFREEP */
1553 m_AMD_MULTIPLE,
1554
1555 /* X86_TUNE_INTER_UNIT_MOVES */
1556 ~(m_AMD_MULTIPLE | m_GENERIC),
1557
1558 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1559 ~(m_AMDFAM10 | m_BDVER1),
1560
1561 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1562 than 4 branch instructions in the 16 byte window. */
1563 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1564 | m_GENERIC,
1565
1566 /* X86_TUNE_SCHEDULE */
1567 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1568 | m_GENERIC,
1569
1570 /* X86_TUNE_USE_BT */
1571 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1572
1573 /* X86_TUNE_USE_INCDEC */
1574 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1575
1576 /* X86_TUNE_PAD_RETURNS */
1577 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1578
1579 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
1580 m_ATOM,
1581
1582 /* X86_TUNE_EXT_80387_CONSTANTS */
1583 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1584 | m_CORE2 | m_GENERIC,
1585
1586 /* X86_TUNE_SHORTEN_X87_SSE */
1587 ~m_K8,
1588
1589 /* X86_TUNE_AVOID_VECTOR_DECODE */
1590 m_K8 | m_GENERIC64,
1591
1592 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1593 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1594 ~(m_386 | m_486),
1595
1596 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1597 vector path on AMD machines. */
1598 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1599
1600 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1601 machines. */
1602 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1603
1604 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1605 than a MOV. */
1606 m_PENT,
1607
1608 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1609 but one byte longer. */
1610 m_PENT,
1611
1612 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1613 operand that cannot be represented using a modRM byte. The XOR
1614 replacement is long decoded, so this split helps here as well. */
1615 m_K6,
1616
1617 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1618 from FP to FP. */
1619 m_AMDFAM10 | m_GENERIC,
1620
1621 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1622 from integer to FP. */
1623 m_AMDFAM10,
1624
1625 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1626 with a subsequent conditional jump instruction into a single
1627 compare-and-branch uop. */
1628 m_CORE2 | m_BDVER1,
1629
1630 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1631 will impact LEA instruction selection. */
1632 m_ATOM,
1633
1634 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
1635 instructions. */
1636 ~m_ATOM,
1637 };
1638
1639 /* Feature tests against the various architecture variations. */
1640 unsigned char ix86_arch_features[X86_ARCH_LAST];
1641
1642 /* Feature tests against the various architecture variations, used to create
1643 ix86_arch_features based on the processor mask. */
1644 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1645 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1646 ~(m_386 | m_486 | m_PENT | m_K6),
1647
1648 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1649 ~m_386,
1650
1651 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1652 ~(m_386 | m_486),
1653
1654 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1655 ~m_386,
1656
1657 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1658 ~m_386,
1659 };
1660
1661 static const unsigned int x86_accumulate_outgoing_args
1662 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1663 | m_GENERIC;
1664
1665 static const unsigned int x86_arch_always_fancy_math_387
1666 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1667 | m_NOCONA | m_CORE2 | m_GENERIC;
1668
1669 static enum stringop_alg stringop_alg = no_stringop;
1670
1671 /* In case the average insn count for single function invocation is
1672 lower than this constant, emit fast (but longer) prologue and
1673 epilogue code. */
1674 #define FAST_PROLOGUE_INSN_COUNT 20
1675
1676 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1677 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1678 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1679 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1680
1681 /* Array of the smallest class containing reg number REGNO, indexed by
1682 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1683
1684 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1685 {
1686 /* ax, dx, cx, bx */
1687 AREG, DREG, CREG, BREG,
1688 /* si, di, bp, sp */
1689 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1690 /* FP registers */
1691 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1692 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1693 /* arg pointer */
1694 NON_Q_REGS,
1695 /* flags, fpsr, fpcr, frame */
1696 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1697 /* SSE registers */
1698 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1699 SSE_REGS, SSE_REGS,
1700 /* MMX registers */
1701 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1702 MMX_REGS, MMX_REGS,
1703 /* REX registers */
1704 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1705 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1706 /* SSE REX registers */
1707 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1708 SSE_REGS, SSE_REGS,
1709 };
1710
1711 /* The "default" register map used in 32bit mode. */
1712
1713 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1714 {
1715 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1716 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1717 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1718 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1719 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1720 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1721 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1722 };
1723
1724 /* The "default" register map used in 64bit mode. */
1725
1726 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1727 {
1728 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1729 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1730 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1731 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1732 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1733 8,9,10,11,12,13,14,15, /* extended integer registers */
1734 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1735 };
1736
1737 /* Define the register numbers to be used in Dwarf debugging information.
1738 The SVR4 reference port C compiler uses the following register numbers
1739 in its Dwarf output code:
1740 0 for %eax (gcc regno = 0)
1741 1 for %ecx (gcc regno = 2)
1742 2 for %edx (gcc regno = 1)
1743 3 for %ebx (gcc regno = 3)
1744 4 for %esp (gcc regno = 7)
1745 5 for %ebp (gcc regno = 6)
1746 6 for %esi (gcc regno = 4)
1747 7 for %edi (gcc regno = 5)
1748 The following three DWARF register numbers are never generated by
1749 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1750 believes these numbers have these meanings.
1751 8 for %eip (no gcc equivalent)
1752 9 for %eflags (gcc regno = 17)
1753 10 for %trapno (no gcc equivalent)
1754 It is not at all clear how we should number the FP stack registers
1755 for the x86 architecture. If the version of SDB on x86/svr4 were
1756 a bit less brain dead with respect to floating-point then we would
1757 have a precedent to follow with respect to DWARF register numbers
1758 for x86 FP registers, but the SDB on x86/svr4 is so completely
1759 broken with respect to FP registers that it is hardly worth thinking
1760 of it as something to strive for compatibility with.
1761 The version of x86/svr4 SDB I have at the moment does (partially)
1762 seem to believe that DWARF register number 11 is associated with
1763 the x86 register %st(0), but that's about all. Higher DWARF
1764 register numbers don't seem to be associated with anything in
1765 particular, and even for DWARF regno 11, SDB only seems to under-
1766 stand that it should say that a variable lives in %st(0) (when
1767 asked via an `=' command) if we said it was in DWARF regno 11,
1768 but SDB still prints garbage when asked for the value of the
1769 variable in question (via a `/' command).
1770 (Also note that the labels SDB prints for various FP stack regs
1771 when doing an `x' command are all wrong.)
1772 Note that these problems generally don't affect the native SVR4
1773 C compiler because it doesn't allow the use of -O with -g and
1774 because when it is *not* optimizing, it allocates a memory
1775 location for each floating-point variable, and the memory
1776 location is what gets described in the DWARF AT_location
1777 attribute for the variable in question.
1778 Regardless of the severe mental illness of the x86/svr4 SDB, we
1779 do something sensible here and we use the following DWARF
1780 register numbers. Note that these are all stack-top-relative
1781 numbers.
1782 11 for %st(0) (gcc regno = 8)
1783 12 for %st(1) (gcc regno = 9)
1784 13 for %st(2) (gcc regno = 10)
1785 14 for %st(3) (gcc regno = 11)
1786 15 for %st(4) (gcc regno = 12)
1787 16 for %st(5) (gcc regno = 13)
1788 17 for %st(6) (gcc regno = 14)
1789 18 for %st(7) (gcc regno = 15)
1790 */
1791 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1792 {
1793 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1794 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1795 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1796 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1797 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1798 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1799 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1800 };
1801
1802 /* Define parameter passing and return registers. */
1803
1804 static int const x86_64_int_parameter_registers[6] =
1805 {
1806 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1807 };
1808
1809 static int const x86_64_ms_abi_int_parameter_registers[4] =
1810 {
1811 CX_REG, DX_REG, R8_REG, R9_REG
1812 };
1813
1814 static int const x86_64_int_return_registers[4] =
1815 {
1816 AX_REG, DX_REG, DI_REG, SI_REG
1817 };
1818
1819 /* Define the structure for the machine field in struct function. */
1820
1821 struct GTY(()) stack_local_entry {
1822 unsigned short mode;
1823 unsigned short n;
1824 rtx rtl;
1825 struct stack_local_entry *next;
1826 };
1827
1828 /* Structure describing stack frame layout.
1829 Stack grows downward:
1830
1831 [arguments]
1832 <- ARG_POINTER
1833 saved pc
1834
1835 saved static chain if ix86_static_chain_on_stack
1836
1837 saved frame pointer if frame_pointer_needed
1838 <- HARD_FRAME_POINTER
1839 [saved regs]
1840 <- regs_save_offset
1841 [padding0]
1842
1843 [saved SSE regs]
1844 <- sse_regs_save_offset
1845 [padding1] |
1846 | <- FRAME_POINTER
1847 [va_arg registers] |
1848 |
1849 [frame] |
1850 |
1851 [padding2] | = to_allocate
1852 <- STACK_POINTER
1853 */
1854 struct ix86_frame
1855 {
1856 int nsseregs;
1857 int nregs;
1858 int va_arg_size;
1859 int red_zone_size;
1860 int outgoing_arguments_size;
1861 HOST_WIDE_INT frame;
1862
1863 /* The offsets relative to ARG_POINTER. */
1864 HOST_WIDE_INT frame_pointer_offset;
1865 HOST_WIDE_INT hard_frame_pointer_offset;
1866 HOST_WIDE_INT stack_pointer_offset;
1867 HOST_WIDE_INT reg_save_offset;
1868 HOST_WIDE_INT sse_reg_save_offset;
1869
1870 /* When save_regs_using_mov is set, emit prologue using
1871 move instead of push instructions. */
1872 bool save_regs_using_mov;
1873 };
1874
1875 /* Code model option. */
1876 enum cmodel ix86_cmodel;
1877 /* Asm dialect. */
1878 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1879 /* TLS dialects. */
1880 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1881
1882 /* Which unit we are generating floating point math for. */
1883 enum fpmath_unit ix86_fpmath;
1884
1885 /* Which cpu are we scheduling for. */
1886 enum attr_cpu ix86_schedule;
1887
1888 /* Which cpu are we optimizing for. */
1889 enum processor_type ix86_tune;
1890
1891 /* Which instruction set architecture to use. */
1892 enum processor_type ix86_arch;
1893
1894 /* true if sse prefetch instruction is not NOOP. */
1895 int x86_prefetch_sse;
1896
1897 /* ix86_regparm_string as a number */
1898 static int ix86_regparm;
1899
1900 /* -mstackrealign option */
1901 static const char ix86_force_align_arg_pointer_string[]
1902 = "force_align_arg_pointer";
1903
1904 static rtx (*ix86_gen_leave) (void);
1905 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1906 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1907 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
1908 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1909 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1910 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1911 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
1912 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
1913 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
1914
1915 /* Preferred alignment for stack boundary in bits. */
1916 unsigned int ix86_preferred_stack_boundary;
1917
1918 /* Alignment for incoming stack boundary in bits specified at
1919 command line. */
1920 static unsigned int ix86_user_incoming_stack_boundary;
1921
1922 /* Default alignment for incoming stack boundary in bits. */
1923 static unsigned int ix86_default_incoming_stack_boundary;
1924
1925 /* Alignment for incoming stack boundary in bits. */
1926 unsigned int ix86_incoming_stack_boundary;
1927
1928 /* The abi used by target. */
1929 enum calling_abi ix86_abi;
1930
1931 /* Values 1-5: see jump.c */
1932 int ix86_branch_cost;
1933
1934 /* Calling abi specific va_list type nodes. */
1935 static GTY(()) tree sysv_va_list_type_node;
1936 static GTY(()) tree ms_va_list_type_node;
1937
1938 /* Variables which are this size or smaller are put in the data/bss
1939 or ldata/lbss sections. */
1940
1941 int ix86_section_threshold = 65536;
1942
1943 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1944 char internal_label_prefix[16];
1945 int internal_label_prefix_len;
1946
1947 /* Fence to use after loop using movnt. */
1948 tree x86_mfence;
1949
1950 /* Register class used for passing given 64bit part of the argument.
1951 These represent classes as documented by the PS ABI, with the exception
1952 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1953 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1954
1955 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1956 whenever possible (upper half does contain padding). */
1957 enum x86_64_reg_class
1958 {
1959 X86_64_NO_CLASS,
1960 X86_64_INTEGER_CLASS,
1961 X86_64_INTEGERSI_CLASS,
1962 X86_64_SSE_CLASS,
1963 X86_64_SSESF_CLASS,
1964 X86_64_SSEDF_CLASS,
1965 X86_64_SSEUP_CLASS,
1966 X86_64_X87_CLASS,
1967 X86_64_X87UP_CLASS,
1968 X86_64_COMPLEX_X87_CLASS,
1969 X86_64_MEMORY_CLASS
1970 };
1971
1972 #define MAX_CLASSES 4
1973
1974 /* Table of constants used by fldpi, fldln2, etc.... */
1975 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1976 static bool ext_80387_constants_init = 0;
1977
1978 \f
1979 static struct machine_function * ix86_init_machine_status (void);
1980 static rtx ix86_function_value (const_tree, const_tree, bool);
1981 static bool ix86_function_value_regno_p (const unsigned int);
1982 static rtx ix86_static_chain (const_tree, bool);
1983 static int ix86_function_regparm (const_tree, const_tree);
1984 static void ix86_compute_frame_layout (struct ix86_frame *);
1985 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1986 rtx, rtx, int);
1987 static void ix86_add_new_builtins (int);
1988 static rtx ix86_expand_vec_perm_builtin (tree);
1989 static tree ix86_canonical_va_list_type (tree);
1990 static void predict_jump (int);
1991 static unsigned int split_stack_prologue_scratch_regno (void);
1992 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
1993
1994 enum ix86_function_specific_strings
1995 {
1996 IX86_FUNCTION_SPECIFIC_ARCH,
1997 IX86_FUNCTION_SPECIFIC_TUNE,
1998 IX86_FUNCTION_SPECIFIC_FPMATH,
1999 IX86_FUNCTION_SPECIFIC_MAX
2000 };
2001
2002 static char *ix86_target_string (int, int, const char *, const char *,
2003 const char *, bool);
2004 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2005 static void ix86_function_specific_save (struct cl_target_option *);
2006 static void ix86_function_specific_restore (struct cl_target_option *);
2007 static void ix86_function_specific_print (FILE *, int,
2008 struct cl_target_option *);
2009 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2010 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
2011 static bool ix86_can_inline_p (tree, tree);
2012 static void ix86_set_current_function (tree);
2013 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2014
2015 static enum calling_abi ix86_function_abi (const_tree);
2016
2017 \f
2018 #ifndef SUBTARGET32_DEFAULT_CPU
2019 #define SUBTARGET32_DEFAULT_CPU "i386"
2020 #endif
2021
2022 /* The svr4 ABI for the i386 says that records and unions are returned
2023 in memory. */
2024 #ifndef DEFAULT_PCC_STRUCT_RETURN
2025 #define DEFAULT_PCC_STRUCT_RETURN 1
2026 #endif
2027
2028 /* Whether -mtune= or -march= were specified */
2029 static int ix86_tune_defaulted;
2030 static int ix86_arch_specified;
2031
2032 /* A mask of ix86_isa_flags that includes bit X if X
2033 was set or cleared on the command line. */
2034 static int ix86_isa_flags_explicit;
2035
2036 /* Define a set of ISAs which are available when a given ISA is
2037 enabled. MMX and SSE ISAs are handled separately. */
2038
2039 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
2040 #define OPTION_MASK_ISA_3DNOW_SET \
2041 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
2042
2043 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
2044 #define OPTION_MASK_ISA_SSE2_SET \
2045 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
2046 #define OPTION_MASK_ISA_SSE3_SET \
2047 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
2048 #define OPTION_MASK_ISA_SSSE3_SET \
2049 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
2050 #define OPTION_MASK_ISA_SSE4_1_SET \
2051 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
2052 #define OPTION_MASK_ISA_SSE4_2_SET \
2053 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
2054 #define OPTION_MASK_ISA_AVX_SET \
2055 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
2056 #define OPTION_MASK_ISA_FMA_SET \
2057 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
2058
2059 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
2060 as -msse4.2. */
2061 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
2062
2063 #define OPTION_MASK_ISA_SSE4A_SET \
2064 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
2065 #define OPTION_MASK_ISA_FMA4_SET \
2066 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
2067 | OPTION_MASK_ISA_AVX_SET)
2068 #define OPTION_MASK_ISA_XOP_SET \
2069 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
2070 #define OPTION_MASK_ISA_LWP_SET \
2071 OPTION_MASK_ISA_LWP
2072
2073 /* AES and PCLMUL need SSE2 because they use xmm registers */
2074 #define OPTION_MASK_ISA_AES_SET \
2075 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
2076 #define OPTION_MASK_ISA_PCLMUL_SET \
2077 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
2078
2079 #define OPTION_MASK_ISA_ABM_SET \
2080 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
2081
2082 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
2083 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
2084 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
2085 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
2086 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
2087
2088 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
2089 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
2090 #define OPTION_MASK_ISA_F16C_SET \
2091 (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
2092
2093 /* Define a set of ISAs which aren't available when a given ISA is
2094 disabled. MMX and SSE ISAs are handled separately. */
2095
2096 #define OPTION_MASK_ISA_MMX_UNSET \
2097 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
2098 #define OPTION_MASK_ISA_3DNOW_UNSET \
2099 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
2100 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
2101
2102 #define OPTION_MASK_ISA_SSE_UNSET \
2103 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2104 #define OPTION_MASK_ISA_SSE2_UNSET \
2105 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2106 #define OPTION_MASK_ISA_SSE3_UNSET \
2107 (OPTION_MASK_ISA_SSE3 \
2108 | OPTION_MASK_ISA_SSSE3_UNSET \
2109 | OPTION_MASK_ISA_SSE4A_UNSET )
2110 #define OPTION_MASK_ISA_SSSE3_UNSET \
2111 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2112 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2113 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2114 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2115 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2116 #define OPTION_MASK_ISA_AVX_UNSET \
2117 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2118 | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
2119 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2120
2121 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2122 as -mno-sse4.1. */
2123 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2124
2125 #define OPTION_MASK_ISA_SSE4A_UNSET \
2126 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2127
2128 #define OPTION_MASK_ISA_FMA4_UNSET \
2129 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2130 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2131 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2132
2133 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2134 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2135 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2136 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2137 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2138 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2139 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2140 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2141
2142 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
2143 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
2144 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
2145
2146 /* Vectorization library interface and handlers. */
2147 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2148
2149 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2150 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2151
2152 /* Processor target table, indexed by processor number */
2153 struct ptt
2154 {
2155 const struct processor_costs *cost; /* Processor costs */
2156 const int align_loop; /* Default alignments. */
2157 const int align_loop_max_skip;
2158 const int align_jump;
2159 const int align_jump_max_skip;
2160 const int align_func;
2161 };
2162
2163 static const struct ptt processor_target_table[PROCESSOR_max] =
2164 {
2165 {&i386_cost, 4, 3, 4, 3, 4},
2166 {&i486_cost, 16, 15, 16, 15, 16},
2167 {&pentium_cost, 16, 7, 16, 7, 16},
2168 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2169 {&geode_cost, 0, 0, 0, 0, 0},
2170 {&k6_cost, 32, 7, 32, 7, 32},
2171 {&athlon_cost, 16, 7, 16, 7, 16},
2172 {&pentium4_cost, 0, 0, 0, 0, 0},
2173 {&k8_cost, 16, 7, 16, 7, 16},
2174 {&nocona_cost, 0, 0, 0, 0, 0},
2175 {&core2_cost, 16, 10, 16, 10, 16},
2176 {&generic32_cost, 16, 7, 16, 7, 16},
2177 {&generic64_cost, 16, 10, 16, 10, 16},
2178 {&amdfam10_cost, 32, 24, 32, 7, 32},
2179 {&bdver1_cost, 32, 24, 32, 7, 32},
2180 {&atom_cost, 16, 7, 16, 7, 16}
2181 };
2182
2183 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2184 {
2185 "generic",
2186 "i386",
2187 "i486",
2188 "pentium",
2189 "pentium-mmx",
2190 "pentiumpro",
2191 "pentium2",
2192 "pentium3",
2193 "pentium4",
2194 "pentium-m",
2195 "prescott",
2196 "nocona",
2197 "core2",
2198 "atom",
2199 "geode",
2200 "k6",
2201 "k6-2",
2202 "k6-3",
2203 "athlon",
2204 "athlon-4",
2205 "k8",
2206 "amdfam10",
2207 "bdver1"
2208 };
2209 \f
2210 /* Return true if a red-zone is in use. */
2211
2212 static inline bool
2213 ix86_using_red_zone (void)
2214 {
2215 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2216 }
2217
2218 /* Implement TARGET_HANDLE_OPTION. */
2219
2220 static bool
2221 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2222 {
2223 switch (code)
2224 {
2225 case OPT_mmmx:
2226 if (value)
2227 {
2228 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2229 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2230 }
2231 else
2232 {
2233 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2234 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2235 }
2236 return true;
2237
2238 case OPT_m3dnow:
2239 if (value)
2240 {
2241 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2242 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2243 }
2244 else
2245 {
2246 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2247 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2248 }
2249 return true;
2250
2251 case OPT_m3dnowa:
2252 return false;
2253
2254 case OPT_msse:
2255 if (value)
2256 {
2257 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2258 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2259 }
2260 else
2261 {
2262 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2263 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2264 }
2265 return true;
2266
2267 case OPT_msse2:
2268 if (value)
2269 {
2270 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2271 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2272 }
2273 else
2274 {
2275 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2276 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2277 }
2278 return true;
2279
2280 case OPT_msse3:
2281 if (value)
2282 {
2283 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2284 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2285 }
2286 else
2287 {
2288 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2289 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2290 }
2291 return true;
2292
2293 case OPT_mssse3:
2294 if (value)
2295 {
2296 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2297 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2298 }
2299 else
2300 {
2301 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2302 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2303 }
2304 return true;
2305
2306 case OPT_msse4_1:
2307 if (value)
2308 {
2309 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2310 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2311 }
2312 else
2313 {
2314 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2315 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2316 }
2317 return true;
2318
2319 case OPT_msse4_2:
2320 if (value)
2321 {
2322 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2323 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2324 }
2325 else
2326 {
2327 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2328 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2329 }
2330 return true;
2331
2332 case OPT_mavx:
2333 if (value)
2334 {
2335 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2336 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2337 }
2338 else
2339 {
2340 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2341 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2342 }
2343 return true;
2344
2345 case OPT_mfma:
2346 if (value)
2347 {
2348 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2349 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2350 }
2351 else
2352 {
2353 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2354 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2355 }
2356 return true;
2357
2358 case OPT_msse4:
2359 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2360 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2361 return true;
2362
2363 case OPT_mno_sse4:
2364 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2365 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2366 return true;
2367
2368 case OPT_msse4a:
2369 if (value)
2370 {
2371 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2372 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2373 }
2374 else
2375 {
2376 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2377 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2378 }
2379 return true;
2380
2381 case OPT_mfma4:
2382 if (value)
2383 {
2384 ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2385 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2386 }
2387 else
2388 {
2389 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2390 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2391 }
2392 return true;
2393
2394 case OPT_mxop:
2395 if (value)
2396 {
2397 ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2398 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2399 }
2400 else
2401 {
2402 ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2403 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2404 }
2405 return true;
2406
2407 case OPT_mlwp:
2408 if (value)
2409 {
2410 ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2411 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2412 }
2413 else
2414 {
2415 ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2416 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2417 }
2418 return true;
2419
2420 case OPT_mabm:
2421 if (value)
2422 {
2423 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2424 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2425 }
2426 else
2427 {
2428 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2429 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2430 }
2431 return true;
2432
2433 case OPT_mpopcnt:
2434 if (value)
2435 {
2436 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2437 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2438 }
2439 else
2440 {
2441 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2442 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2443 }
2444 return true;
2445
2446 case OPT_msahf:
2447 if (value)
2448 {
2449 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2450 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2451 }
2452 else
2453 {
2454 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2455 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2456 }
2457 return true;
2458
2459 case OPT_mcx16:
2460 if (value)
2461 {
2462 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2463 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2464 }
2465 else
2466 {
2467 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2468 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2469 }
2470 return true;
2471
2472 case OPT_mmovbe:
2473 if (value)
2474 {
2475 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2476 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2477 }
2478 else
2479 {
2480 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2481 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2482 }
2483 return true;
2484
2485 case OPT_mcrc32:
2486 if (value)
2487 {
2488 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2489 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2490 }
2491 else
2492 {
2493 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2494 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2495 }
2496 return true;
2497
2498 case OPT_maes:
2499 if (value)
2500 {
2501 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2502 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2503 }
2504 else
2505 {
2506 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2507 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2508 }
2509 return true;
2510
2511 case OPT_mpclmul:
2512 if (value)
2513 {
2514 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2515 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2516 }
2517 else
2518 {
2519 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2520 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2521 }
2522 return true;
2523
2524 case OPT_mfsgsbase:
2525 if (value)
2526 {
2527 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
2528 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
2529 }
2530 else
2531 {
2532 ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
2533 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
2534 }
2535 return true;
2536
2537 case OPT_mrdrnd:
2538 if (value)
2539 {
2540 ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
2541 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
2542 }
2543 else
2544 {
2545 ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
2546 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
2547 }
2548 return true;
2549
2550 case OPT_mf16c:
2551 if (value)
2552 {
2553 ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
2554 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
2555 }
2556 else
2557 {
2558 ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
2559 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
2560 }
2561 return true;
2562
2563 default:
2564 return true;
2565 }
2566 }
2567 \f
2568 /* Return a string that documents the current -m options. The caller is
2569 responsible for freeing the string. */
2570
2571 static char *
2572 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2573 const char *fpmath, bool add_nl_p)
2574 {
2575 struct ix86_target_opts
2576 {
2577 const char *option; /* option string */
2578 int mask; /* isa mask options */
2579 };
2580
2581 /* This table is ordered so that options like -msse4.2 that imply
2582 preceding options while match those first. */
2583 static struct ix86_target_opts isa_opts[] =
2584 {
2585 { "-m64", OPTION_MASK_ISA_64BIT },
2586 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2587 { "-mfma", OPTION_MASK_ISA_FMA },
2588 { "-mxop", OPTION_MASK_ISA_XOP },
2589 { "-mlwp", OPTION_MASK_ISA_LWP },
2590 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2591 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2592 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2593 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2594 { "-msse3", OPTION_MASK_ISA_SSE3 },
2595 { "-msse2", OPTION_MASK_ISA_SSE2 },
2596 { "-msse", OPTION_MASK_ISA_SSE },
2597 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2598 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2599 { "-mmmx", OPTION_MASK_ISA_MMX },
2600 { "-mabm", OPTION_MASK_ISA_ABM },
2601 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2602 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2603 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2604 { "-maes", OPTION_MASK_ISA_AES },
2605 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2606 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2607 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2608 { "-mf16c", OPTION_MASK_ISA_F16C },
2609 };
2610
2611 /* Flag options. */
2612 static struct ix86_target_opts flag_opts[] =
2613 {
2614 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2615 { "-m80387", MASK_80387 },
2616 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2617 { "-malign-double", MASK_ALIGN_DOUBLE },
2618 { "-mcld", MASK_CLD },
2619 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2620 { "-mieee-fp", MASK_IEEE_FP },
2621 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2622 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2623 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2624 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2625 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2626 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2627 { "-mno-red-zone", MASK_NO_RED_ZONE },
2628 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2629 { "-mrecip", MASK_RECIP },
2630 { "-mrtd", MASK_RTD },
2631 { "-msseregparm", MASK_SSEREGPARM },
2632 { "-mstack-arg-probe", MASK_STACK_PROBE },
2633 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2634 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2635 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2636 };
2637
2638 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2639
2640 char isa_other[40];
2641 char target_other[40];
2642 unsigned num = 0;
2643 unsigned i, j;
2644 char *ret;
2645 char *ptr;
2646 size_t len;
2647 size_t line_len;
2648 size_t sep_len;
2649
2650 memset (opts, '\0', sizeof (opts));
2651
2652 /* Add -march= option. */
2653 if (arch)
2654 {
2655 opts[num][0] = "-march=";
2656 opts[num++][1] = arch;
2657 }
2658
2659 /* Add -mtune= option. */
2660 if (tune)
2661 {
2662 opts[num][0] = "-mtune=";
2663 opts[num++][1] = tune;
2664 }
2665
2666 /* Pick out the options in isa options. */
2667 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2668 {
2669 if ((isa & isa_opts[i].mask) != 0)
2670 {
2671 opts[num++][0] = isa_opts[i].option;
2672 isa &= ~ isa_opts[i].mask;
2673 }
2674 }
2675
2676 if (isa && add_nl_p)
2677 {
2678 opts[num++][0] = isa_other;
2679 sprintf (isa_other, "(other isa: %#x)", isa);
2680 }
2681
2682 /* Add flag options. */
2683 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2684 {
2685 if ((flags & flag_opts[i].mask) != 0)
2686 {
2687 opts[num++][0] = flag_opts[i].option;
2688 flags &= ~ flag_opts[i].mask;
2689 }
2690 }
2691
2692 if (flags && add_nl_p)
2693 {
2694 opts[num++][0] = target_other;
2695 sprintf (target_other, "(other flags: %#x)", flags);
2696 }
2697
2698 /* Add -fpmath= option. */
2699 if (fpmath)
2700 {
2701 opts[num][0] = "-mfpmath=";
2702 opts[num++][1] = fpmath;
2703 }
2704
2705 /* Any options? */
2706 if (num == 0)
2707 return NULL;
2708
2709 gcc_assert (num < ARRAY_SIZE (opts));
2710
2711 /* Size the string. */
2712 len = 0;
2713 sep_len = (add_nl_p) ? 3 : 1;
2714 for (i = 0; i < num; i++)
2715 {
2716 len += sep_len;
2717 for (j = 0; j < 2; j++)
2718 if (opts[i][j])
2719 len += strlen (opts[i][j]);
2720 }
2721
2722 /* Build the string. */
2723 ret = ptr = (char *) xmalloc (len);
2724 line_len = 0;
2725
2726 for (i = 0; i < num; i++)
2727 {
2728 size_t len2[2];
2729
2730 for (j = 0; j < 2; j++)
2731 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2732
2733 if (i != 0)
2734 {
2735 *ptr++ = ' ';
2736 line_len++;
2737
2738 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2739 {
2740 *ptr++ = '\\';
2741 *ptr++ = '\n';
2742 line_len = 0;
2743 }
2744 }
2745
2746 for (j = 0; j < 2; j++)
2747 if (opts[i][j])
2748 {
2749 memcpy (ptr, opts[i][j], len2[j]);
2750 ptr += len2[j];
2751 line_len += len2[j];
2752 }
2753 }
2754
2755 *ptr = '\0';
2756 gcc_assert (ret + len >= ptr);
2757
2758 return ret;
2759 }
2760
2761 /* Return TRUE if software prefetching is beneficial for the
2762 given CPU. */
2763
2764 static bool
2765 software_prefetching_beneficial_p (void)
2766 {
2767 switch (ix86_tune)
2768 {
2769 case PROCESSOR_GEODE:
2770 case PROCESSOR_K6:
2771 case PROCESSOR_ATHLON:
2772 case PROCESSOR_K8:
2773 case PROCESSOR_AMDFAM10:
2774 return true;
2775
2776 default:
2777 return false;
2778 }
2779 }
2780
2781 /* Return true, if profiling code should be emitted before
2782 prologue. Otherwise it returns false.
2783 Note: For x86 with "hotfix" it is sorried. */
2784 static bool
2785 ix86_profile_before_prologue (void)
2786 {
2787 return flag_fentry != 0;
2788 }
2789
2790 /* Function that is callable from the debugger to print the current
2791 options. */
2792 void
2793 ix86_debug_options (void)
2794 {
2795 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2796 ix86_arch_string, ix86_tune_string,
2797 ix86_fpmath_string, true);
2798
2799 if (opts)
2800 {
2801 fprintf (stderr, "%s\n\n", opts);
2802 free (opts);
2803 }
2804 else
2805 fputs ("<no options>\n\n", stderr);
2806
2807 return;
2808 }
2809 \f
2810 /* Override various settings based on options. If MAIN_ARGS_P, the
2811 options are from the command line, otherwise they are from
2812 attributes. */
2813
2814 static void
2815 ix86_option_override_internal (bool main_args_p)
2816 {
2817 int i;
2818 unsigned int ix86_arch_mask, ix86_tune_mask;
2819 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2820 const char *prefix;
2821 const char *suffix;
2822 const char *sw;
2823
2824 /* Comes from final.c -- no real reason to change it. */
2825 #define MAX_CODE_ALIGN 16
2826
2827 enum pta_flags
2828 {
2829 PTA_SSE = 1 << 0,
2830 PTA_SSE2 = 1 << 1,
2831 PTA_SSE3 = 1 << 2,
2832 PTA_MMX = 1 << 3,
2833 PTA_PREFETCH_SSE = 1 << 4,
2834 PTA_3DNOW = 1 << 5,
2835 PTA_3DNOW_A = 1 << 6,
2836 PTA_64BIT = 1 << 7,
2837 PTA_SSSE3 = 1 << 8,
2838 PTA_CX16 = 1 << 9,
2839 PTA_POPCNT = 1 << 10,
2840 PTA_ABM = 1 << 11,
2841 PTA_SSE4A = 1 << 12,
2842 PTA_NO_SAHF = 1 << 13,
2843 PTA_SSE4_1 = 1 << 14,
2844 PTA_SSE4_2 = 1 << 15,
2845 PTA_AES = 1 << 16,
2846 PTA_PCLMUL = 1 << 17,
2847 PTA_AVX = 1 << 18,
2848 PTA_FMA = 1 << 19,
2849 PTA_MOVBE = 1 << 20,
2850 PTA_FMA4 = 1 << 21,
2851 PTA_XOP = 1 << 22,
2852 PTA_LWP = 1 << 23,
2853 PTA_FSGSBASE = 1 << 24,
2854 PTA_RDRND = 1 << 25,
2855 PTA_F16C = 1 << 26
2856 };
2857
2858 static struct pta
2859 {
2860 const char *const name; /* processor name or nickname. */
2861 const enum processor_type processor;
2862 const enum attr_cpu schedule;
2863 const unsigned /*enum pta_flags*/ flags;
2864 }
2865 const processor_alias_table[] =
2866 {
2867 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2868 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2869 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2870 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2871 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2872 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2873 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2874 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2875 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2876 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2877 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2878 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2879 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2880 PTA_MMX | PTA_SSE},
2881 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2882 PTA_MMX | PTA_SSE},
2883 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2884 PTA_MMX | PTA_SSE | PTA_SSE2},
2885 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2886 PTA_MMX |PTA_SSE | PTA_SSE2},
2887 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2888 PTA_MMX | PTA_SSE | PTA_SSE2},
2889 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2890 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2891 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2892 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2893 | PTA_CX16 | PTA_NO_SAHF},
2894 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2895 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2896 | PTA_SSSE3 | PTA_CX16},
2897 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2898 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2899 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2900 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2901 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2902 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2903 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2904 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2905 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2906 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2907 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2908 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2909 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2910 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2911 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2912 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2913 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2914 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2915 {"x86-64", PROCESSOR_K8, CPU_K8,
2916 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2917 {"k8", PROCESSOR_K8, CPU_K8,
2918 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2919 | PTA_SSE2 | PTA_NO_SAHF},
2920 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2921 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2922 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2923 {"opteron", PROCESSOR_K8, CPU_K8,
2924 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2925 | PTA_SSE2 | PTA_NO_SAHF},
2926 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2927 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2928 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2929 {"athlon64", PROCESSOR_K8, CPU_K8,
2930 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2931 | PTA_SSE2 | PTA_NO_SAHF},
2932 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2933 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2934 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2935 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2936 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2937 | PTA_SSE2 | PTA_NO_SAHF},
2938 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2939 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2940 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2941 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2942 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2943 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2944 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
2945 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2946 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM
2947 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES
2948 | PTA_PCLMUL | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP},
2949 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2950 0 /* flags are only used for -march switch. */ },
2951 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2952 PTA_64BIT /* flags are only used for -march switch. */ },
2953 };
2954
2955 int const pta_size = ARRAY_SIZE (processor_alias_table);
2956
2957 /* Set up prefix/suffix so the error messages refer to either the command
2958 line argument, or the attribute(target). */
2959 if (main_args_p)
2960 {
2961 prefix = "-m";
2962 suffix = "";
2963 sw = "switch";
2964 }
2965 else
2966 {
2967 prefix = "option(\"";
2968 suffix = "\")";
2969 sw = "attribute";
2970 }
2971
2972 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2973 SUBTARGET_OVERRIDE_OPTIONS;
2974 #endif
2975
2976 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2977 SUBSUBTARGET_OVERRIDE_OPTIONS;
2978 #endif
2979
2980 /* -fPIC is the default for x86_64. */
2981 if (TARGET_MACHO && TARGET_64BIT)
2982 flag_pic = 2;
2983
2984 /* Need to check -mtune=generic first. */
2985 if (ix86_tune_string)
2986 {
2987 if (!strcmp (ix86_tune_string, "generic")
2988 || !strcmp (ix86_tune_string, "i686")
2989 /* As special support for cross compilers we read -mtune=native
2990 as -mtune=generic. With native compilers we won't see the
2991 -mtune=native, as it was changed by the driver. */
2992 || !strcmp (ix86_tune_string, "native"))
2993 {
2994 if (TARGET_64BIT)
2995 ix86_tune_string = "generic64";
2996 else
2997 ix86_tune_string = "generic32";
2998 }
2999 /* If this call is for setting the option attribute, allow the
3000 generic32/generic64 that was previously set. */
3001 else if (!main_args_p
3002 && (!strcmp (ix86_tune_string, "generic32")
3003 || !strcmp (ix86_tune_string, "generic64")))
3004 ;
3005 else if (!strncmp (ix86_tune_string, "generic", 7))
3006 error ("bad value (%s) for %stune=%s %s",
3007 ix86_tune_string, prefix, suffix, sw);
3008 else if (!strcmp (ix86_tune_string, "x86-64"))
3009 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
3010 "%stune=k8%s or %stune=generic%s instead as appropriate.",
3011 prefix, suffix, prefix, suffix, prefix, suffix);
3012 }
3013 else
3014 {
3015 if (ix86_arch_string)
3016 ix86_tune_string = ix86_arch_string;
3017 if (!ix86_tune_string)
3018 {
3019 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3020 ix86_tune_defaulted = 1;
3021 }
3022
3023 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3024 need to use a sensible tune option. */
3025 if (!strcmp (ix86_tune_string, "generic")
3026 || !strcmp (ix86_tune_string, "x86-64")
3027 || !strcmp (ix86_tune_string, "i686"))
3028 {
3029 if (TARGET_64BIT)
3030 ix86_tune_string = "generic64";
3031 else
3032 ix86_tune_string = "generic32";
3033 }
3034 }
3035
3036 if (ix86_stringop_string)
3037 {
3038 if (!strcmp (ix86_stringop_string, "rep_byte"))
3039 stringop_alg = rep_prefix_1_byte;
3040 else if (!strcmp (ix86_stringop_string, "libcall"))
3041 stringop_alg = libcall;
3042 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
3043 stringop_alg = rep_prefix_4_byte;
3044 else if (!strcmp (ix86_stringop_string, "rep_8byte")
3045 && TARGET_64BIT)
3046 /* rep; movq isn't available in 32-bit code. */
3047 stringop_alg = rep_prefix_8_byte;
3048 else if (!strcmp (ix86_stringop_string, "byte_loop"))
3049 stringop_alg = loop_1_byte;
3050 else if (!strcmp (ix86_stringop_string, "loop"))
3051 stringop_alg = loop;
3052 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
3053 stringop_alg = unrolled_loop;
3054 else
3055 error ("bad value (%s) for %sstringop-strategy=%s %s",
3056 ix86_stringop_string, prefix, suffix, sw);
3057 }
3058
3059 if (!ix86_arch_string)
3060 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3061 else
3062 ix86_arch_specified = 1;
3063
3064 /* Validate -mabi= value. */
3065 if (ix86_abi_string)
3066 {
3067 if (strcmp (ix86_abi_string, "sysv") == 0)
3068 ix86_abi = SYSV_ABI;
3069 else if (strcmp (ix86_abi_string, "ms") == 0)
3070 ix86_abi = MS_ABI;
3071 else
3072 error ("unknown ABI (%s) for %sabi=%s %s",
3073 ix86_abi_string, prefix, suffix, sw);
3074 }
3075 else
3076 ix86_abi = DEFAULT_ABI;
3077
3078 if (ix86_cmodel_string != 0)
3079 {
3080 if (!strcmp (ix86_cmodel_string, "small"))
3081 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3082 else if (!strcmp (ix86_cmodel_string, "medium"))
3083 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
3084 else if (!strcmp (ix86_cmodel_string, "large"))
3085 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
3086 else if (flag_pic)
3087 error ("code model %s does not support PIC mode", ix86_cmodel_string);
3088 else if (!strcmp (ix86_cmodel_string, "32"))
3089 ix86_cmodel = CM_32;
3090 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
3091 ix86_cmodel = CM_KERNEL;
3092 else
3093 error ("bad value (%s) for %scmodel=%s %s",
3094 ix86_cmodel_string, prefix, suffix, sw);
3095 }
3096 else
3097 {
3098 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3099 use of rip-relative addressing. This eliminates fixups that
3100 would otherwise be needed if this object is to be placed in a
3101 DLL, and is essentially just as efficient as direct addressing. */
3102 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3103 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3104 else if (TARGET_64BIT)
3105 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3106 else
3107 ix86_cmodel = CM_32;
3108 }
3109 if (ix86_asm_string != 0)
3110 {
3111 if (! TARGET_MACHO
3112 && !strcmp (ix86_asm_string, "intel"))
3113 ix86_asm_dialect = ASM_INTEL;
3114 else if (!strcmp (ix86_asm_string, "att"))
3115 ix86_asm_dialect = ASM_ATT;
3116 else
3117 error ("bad value (%s) for %sasm=%s %s",
3118 ix86_asm_string, prefix, suffix, sw);
3119 }
3120 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
3121 error ("code model %qs not supported in the %s bit mode",
3122 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
3123 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3124 sorry ("%i-bit mode not compiled in",
3125 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3126
3127 for (i = 0; i < pta_size; i++)
3128 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3129 {
3130 ix86_schedule = processor_alias_table[i].schedule;
3131 ix86_arch = processor_alias_table[i].processor;
3132 /* Default cpu tuning to the architecture. */
3133 ix86_tune = ix86_arch;
3134
3135 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3136 error ("CPU you selected does not support x86-64 "
3137 "instruction set");
3138
3139 if (processor_alias_table[i].flags & PTA_MMX
3140 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3141 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3142 if (processor_alias_table[i].flags & PTA_3DNOW
3143 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3144 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3145 if (processor_alias_table[i].flags & PTA_3DNOW_A
3146 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3147 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3148 if (processor_alias_table[i].flags & PTA_SSE
3149 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3150 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3151 if (processor_alias_table[i].flags & PTA_SSE2
3152 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3153 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3154 if (processor_alias_table[i].flags & PTA_SSE3
3155 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3156 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3157 if (processor_alias_table[i].flags & PTA_SSSE3
3158 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3159 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3160 if (processor_alias_table[i].flags & PTA_SSE4_1
3161 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3162 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3163 if (processor_alias_table[i].flags & PTA_SSE4_2
3164 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3165 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3166 if (processor_alias_table[i].flags & PTA_AVX
3167 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3168 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3169 if (processor_alias_table[i].flags & PTA_FMA
3170 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3171 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3172 if (processor_alias_table[i].flags & PTA_SSE4A
3173 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3174 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3175 if (processor_alias_table[i].flags & PTA_FMA4
3176 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3177 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3178 if (processor_alias_table[i].flags & PTA_XOP
3179 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3180 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3181 if (processor_alias_table[i].flags & PTA_LWP
3182 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3183 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3184 if (processor_alias_table[i].flags & PTA_ABM
3185 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3186 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3187 if (processor_alias_table[i].flags & PTA_CX16
3188 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3189 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3190 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3191 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3192 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3193 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3194 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3195 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3196 if (processor_alias_table[i].flags & PTA_MOVBE
3197 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3198 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3199 if (processor_alias_table[i].flags & PTA_AES
3200 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3201 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3202 if (processor_alias_table[i].flags & PTA_PCLMUL
3203 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3204 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3205 if (processor_alias_table[i].flags & PTA_FSGSBASE
3206 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3207 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3208 if (processor_alias_table[i].flags & PTA_RDRND
3209 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3210 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3211 if (processor_alias_table[i].flags & PTA_F16C
3212 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3213 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3214 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3215 x86_prefetch_sse = true;
3216
3217 break;
3218 }
3219
3220 if (!strcmp (ix86_arch_string, "generic"))
3221 error ("generic CPU can be used only for %stune=%s %s",
3222 prefix, suffix, sw);
3223 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3224 error ("bad value (%s) for %sarch=%s %s",
3225 ix86_arch_string, prefix, suffix, sw);
3226
3227 ix86_arch_mask = 1u << ix86_arch;
3228 for (i = 0; i < X86_ARCH_LAST; ++i)
3229 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3230
3231 for (i = 0; i < pta_size; i++)
3232 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3233 {
3234 ix86_schedule = processor_alias_table[i].schedule;
3235 ix86_tune = processor_alias_table[i].processor;
3236 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3237 {
3238 if (ix86_tune_defaulted)
3239 {
3240 ix86_tune_string = "x86-64";
3241 for (i = 0; i < pta_size; i++)
3242 if (! strcmp (ix86_tune_string,
3243 processor_alias_table[i].name))
3244 break;
3245 ix86_schedule = processor_alias_table[i].schedule;
3246 ix86_tune = processor_alias_table[i].processor;
3247 }
3248 else
3249 error ("CPU you selected does not support x86-64 "
3250 "instruction set");
3251 }
3252 /* Intel CPUs have always interpreted SSE prefetch instructions as
3253 NOPs; so, we can enable SSE prefetch instructions even when
3254 -mtune (rather than -march) points us to a processor that has them.
3255 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3256 higher processors. */
3257 if (TARGET_CMOVE
3258 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3259 x86_prefetch_sse = true;
3260 break;
3261 }
3262
3263 if (ix86_tune_specified && i == pta_size)
3264 error ("bad value (%s) for %stune=%s %s",
3265 ix86_tune_string, prefix, suffix, sw);
3266
3267 ix86_tune_mask = 1u << ix86_tune;
3268 for (i = 0; i < X86_TUNE_LAST; ++i)
3269 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3270
3271 #ifndef USE_IX86_FRAME_POINTER
3272 #define USE_IX86_FRAME_POINTER 0
3273 #endif
3274
3275 /* Set the default values for switches whose default depends on TARGET_64BIT
3276 in case they weren't overwritten by command line options. */
3277 if (TARGET_64BIT)
3278 {
3279 if (flag_zee == 2)
3280 flag_zee = 1;
3281 if (flag_omit_frame_pointer == 2)
3282 flag_omit_frame_pointer = 1;
3283 if (flag_asynchronous_unwind_tables == 2)
3284 flag_asynchronous_unwind_tables = 1;
3285 if (flag_pcc_struct_return == 2)
3286 flag_pcc_struct_return = 0;
3287 }
3288 else
3289 {
3290 if (flag_zee == 2)
3291 flag_zee = 0;
3292 if (flag_omit_frame_pointer == 2)
3293 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3294 if (flag_asynchronous_unwind_tables == 2)
3295 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3296 if (flag_pcc_struct_return == 2)
3297 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3298 }
3299
3300 if (optimize_size)
3301 ix86_cost = &ix86_size_cost;
3302 else
3303 ix86_cost = processor_target_table[ix86_tune].cost;
3304
3305 /* Arrange to set up i386_stack_locals for all functions. */
3306 init_machine_status = ix86_init_machine_status;
3307
3308 /* Validate -mregparm= value. */
3309 if (ix86_regparm_string)
3310 {
3311 if (TARGET_64BIT)
3312 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3313 i = atoi (ix86_regparm_string);
3314 if (i < 0 || i > REGPARM_MAX)
3315 error ("%sregparm=%d%s is not between 0 and %d",
3316 prefix, i, suffix, REGPARM_MAX);
3317 else
3318 ix86_regparm = i;
3319 }
3320 if (TARGET_64BIT)
3321 ix86_regparm = REGPARM_MAX;
3322
3323 /* If the user has provided any of the -malign-* options,
3324 warn and use that value only if -falign-* is not set.
3325 Remove this code in GCC 3.2 or later. */
3326 if (ix86_align_loops_string)
3327 {
3328 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3329 prefix, suffix, suffix);
3330 if (align_loops == 0)
3331 {
3332 i = atoi (ix86_align_loops_string);
3333 if (i < 0 || i > MAX_CODE_ALIGN)
3334 error ("%salign-loops=%d%s is not between 0 and %d",
3335 prefix, i, suffix, MAX_CODE_ALIGN);
3336 else
3337 align_loops = 1 << i;
3338 }
3339 }
3340
3341 if (ix86_align_jumps_string)
3342 {
3343 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3344 prefix, suffix, suffix);
3345 if (align_jumps == 0)
3346 {
3347 i = atoi (ix86_align_jumps_string);
3348 if (i < 0 || i > MAX_CODE_ALIGN)
3349 error ("%salign-loops=%d%s is not between 0 and %d",
3350 prefix, i, suffix, MAX_CODE_ALIGN);
3351 else
3352 align_jumps = 1 << i;
3353 }
3354 }
3355
3356 if (ix86_align_funcs_string)
3357 {
3358 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3359 prefix, suffix, suffix);
3360 if (align_functions == 0)
3361 {
3362 i = atoi (ix86_align_funcs_string);
3363 if (i < 0 || i > MAX_CODE_ALIGN)
3364 error ("%salign-loops=%d%s is not between 0 and %d",
3365 prefix, i, suffix, MAX_CODE_ALIGN);
3366 else
3367 align_functions = 1 << i;
3368 }
3369 }
3370
3371 /* Default align_* from the processor table. */
3372 if (align_loops == 0)
3373 {
3374 align_loops = processor_target_table[ix86_tune].align_loop;
3375 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3376 }
3377 if (align_jumps == 0)
3378 {
3379 align_jumps = processor_target_table[ix86_tune].align_jump;
3380 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3381 }
3382 if (align_functions == 0)
3383 {
3384 align_functions = processor_target_table[ix86_tune].align_func;
3385 }
3386
3387 /* Validate -mbranch-cost= value, or provide default. */
3388 ix86_branch_cost = ix86_cost->branch_cost;
3389 if (ix86_branch_cost_string)
3390 {
3391 i = atoi (ix86_branch_cost_string);
3392 if (i < 0 || i > 5)
3393 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3394 else
3395 ix86_branch_cost = i;
3396 }
3397 if (ix86_section_threshold_string)
3398 {
3399 i = atoi (ix86_section_threshold_string);
3400 if (i < 0)
3401 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3402 else
3403 ix86_section_threshold = i;
3404 }
3405
3406 if (ix86_tls_dialect_string)
3407 {
3408 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3409 ix86_tls_dialect = TLS_DIALECT_GNU;
3410 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3411 ix86_tls_dialect = TLS_DIALECT_GNU2;
3412 else
3413 error ("bad value (%s) for %stls-dialect=%s %s",
3414 ix86_tls_dialect_string, prefix, suffix, sw);
3415 }
3416
3417 if (ix87_precision_string)
3418 {
3419 i = atoi (ix87_precision_string);
3420 if (i != 32 && i != 64 && i != 80)
3421 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3422 }
3423
3424 if (TARGET_64BIT)
3425 {
3426 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3427
3428 /* Enable by default the SSE and MMX builtins. Do allow the user to
3429 explicitly disable any of these. In particular, disabling SSE and
3430 MMX for kernel code is extremely useful. */
3431 if (!ix86_arch_specified)
3432 ix86_isa_flags
3433 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3434 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3435
3436 if (TARGET_RTD)
3437 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3438 }
3439 else
3440 {
3441 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3442
3443 if (!ix86_arch_specified)
3444 ix86_isa_flags
3445 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3446
3447 /* i386 ABI does not specify red zone. It still makes sense to use it
3448 when programmer takes care to stack from being destroyed. */
3449 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3450 target_flags |= MASK_NO_RED_ZONE;
3451 }
3452
3453 /* Keep nonleaf frame pointers. */
3454 if (flag_omit_frame_pointer)
3455 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3456 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3457 flag_omit_frame_pointer = 1;
3458
3459 /* If we're doing fast math, we don't care about comparison order
3460 wrt NaNs. This lets us use a shorter comparison sequence. */
3461 if (flag_finite_math_only)
3462 target_flags &= ~MASK_IEEE_FP;
3463
3464 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3465 since the insns won't need emulation. */
3466 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3467 target_flags &= ~MASK_NO_FANCY_MATH_387;
3468
3469 /* Likewise, if the target doesn't have a 387, or we've specified
3470 software floating point, don't use 387 inline intrinsics. */
3471 if (!TARGET_80387)
3472 target_flags |= MASK_NO_FANCY_MATH_387;
3473
3474 /* Turn on MMX builtins for -msse. */
3475 if (TARGET_SSE)
3476 {
3477 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3478 x86_prefetch_sse = true;
3479 }
3480
3481 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3482 if (TARGET_SSE4_2 || TARGET_ABM)
3483 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3484
3485 /* Validate -mpreferred-stack-boundary= value or default it to
3486 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3487 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3488 if (ix86_preferred_stack_boundary_string)
3489 {
3490 i = atoi (ix86_preferred_stack_boundary_string);
3491 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3492 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3493 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3494 else
3495 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3496 }
3497
3498 /* Set the default value for -mstackrealign. */
3499 if (ix86_force_align_arg_pointer == -1)
3500 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3501
3502 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3503
3504 /* Validate -mincoming-stack-boundary= value or default it to
3505 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3506 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3507 if (ix86_incoming_stack_boundary_string)
3508 {
3509 i = atoi (ix86_incoming_stack_boundary_string);
3510 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3511 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3512 i, TARGET_64BIT ? 4 : 2);
3513 else
3514 {
3515 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3516 ix86_incoming_stack_boundary
3517 = ix86_user_incoming_stack_boundary;
3518 }
3519 }
3520
3521 /* Accept -msseregparm only if at least SSE support is enabled. */
3522 if (TARGET_SSEREGPARM
3523 && ! TARGET_SSE)
3524 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3525
3526 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3527 if (ix86_fpmath_string != 0)
3528 {
3529 if (! strcmp (ix86_fpmath_string, "387"))
3530 ix86_fpmath = FPMATH_387;
3531 else if (! strcmp (ix86_fpmath_string, "sse"))
3532 {
3533 if (!TARGET_SSE)
3534 {
3535 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3536 ix86_fpmath = FPMATH_387;
3537 }
3538 else
3539 ix86_fpmath = FPMATH_SSE;
3540 }
3541 else if (! strcmp (ix86_fpmath_string, "387,sse")
3542 || ! strcmp (ix86_fpmath_string, "387+sse")
3543 || ! strcmp (ix86_fpmath_string, "sse,387")
3544 || ! strcmp (ix86_fpmath_string, "sse+387")
3545 || ! strcmp (ix86_fpmath_string, "both"))
3546 {
3547 if (!TARGET_SSE)
3548 {
3549 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3550 ix86_fpmath = FPMATH_387;
3551 }
3552 else if (!TARGET_80387)
3553 {
3554 warning (0, "387 instruction set disabled, using SSE arithmetics");
3555 ix86_fpmath = FPMATH_SSE;
3556 }
3557 else
3558 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3559 }
3560 else
3561 error ("bad value (%s) for %sfpmath=%s %s",
3562 ix86_fpmath_string, prefix, suffix, sw);
3563 }
3564
3565 /* If the i387 is disabled, then do not return values in it. */
3566 if (!TARGET_80387)
3567 target_flags &= ~MASK_FLOAT_RETURNS;
3568
3569 /* Use external vectorized library in vectorizing intrinsics. */
3570 if (ix86_veclibabi_string)
3571 {
3572 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3573 ix86_veclib_handler = ix86_veclibabi_svml;
3574 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3575 ix86_veclib_handler = ix86_veclibabi_acml;
3576 else
3577 error ("unknown vectorization library ABI type (%s) for "
3578 "%sveclibabi=%s %s", ix86_veclibabi_string,
3579 prefix, suffix, sw);
3580 }
3581
3582 if ((!USE_IX86_FRAME_POINTER
3583 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3584 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3585 && !optimize_size)
3586 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3587
3588 /* ??? Unwind info is not correct around the CFG unless either a frame
3589 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3590 unwind info generation to be aware of the CFG and propagating states
3591 around edges. */
3592 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3593 || flag_exceptions || flag_non_call_exceptions)
3594 && flag_omit_frame_pointer
3595 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3596 {
3597 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3598 warning (0, "unwind tables currently require either a frame pointer "
3599 "or %saccumulate-outgoing-args%s for correctness",
3600 prefix, suffix);
3601 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3602 }
3603
3604 /* If stack probes are required, the space used for large function
3605 arguments on the stack must also be probed, so enable
3606 -maccumulate-outgoing-args so this happens in the prologue. */
3607 if (TARGET_STACK_PROBE
3608 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3609 {
3610 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3611 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3612 "for correctness", prefix, suffix);
3613 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3614 }
3615
3616 /* For sane SSE instruction set generation we need fcomi instruction.
3617 It is safe to enable all CMOVE instructions. */
3618 if (TARGET_SSE)
3619 TARGET_CMOVE = 1;
3620
3621 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3622 {
3623 char *p;
3624 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3625 p = strchr (internal_label_prefix, 'X');
3626 internal_label_prefix_len = p - internal_label_prefix;
3627 *p = '\0';
3628 }
3629
3630 /* When scheduling description is not available, disable scheduler pass
3631 so it won't slow down the compilation and make x87 code slower. */
3632 if (!TARGET_SCHEDULE)
3633 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3634
3635 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3636 set_param_value ("simultaneous-prefetches",
3637 ix86_cost->simultaneous_prefetches);
3638 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3639 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3640 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3641 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3642 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3643 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3644
3645 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3646 if (flag_prefetch_loop_arrays < 0
3647 && HAVE_prefetch
3648 && optimize >= 3
3649 && software_prefetching_beneficial_p ())
3650 flag_prefetch_loop_arrays = 1;
3651
3652 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3653 can be optimized to ap = __builtin_next_arg (0). */
3654 if (!TARGET_64BIT && !flag_split_stack)
3655 targetm.expand_builtin_va_start = NULL;
3656
3657 if (TARGET_64BIT)
3658 {
3659 ix86_gen_leave = gen_leave_rex64;
3660 ix86_gen_add3 = gen_adddi3;
3661 ix86_gen_sub3 = gen_subdi3;
3662 ix86_gen_sub3_carry = gen_subdi3_carry;
3663 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3664 ix86_gen_monitor = gen_sse3_monitor64;
3665 ix86_gen_andsp = gen_anddi3;
3666 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
3667 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3668 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3669 }
3670 else
3671 {
3672 ix86_gen_leave = gen_leave;
3673 ix86_gen_add3 = gen_addsi3;
3674 ix86_gen_sub3 = gen_subsi3;
3675 ix86_gen_sub3_carry = gen_subsi3_carry;
3676 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3677 ix86_gen_monitor = gen_sse3_monitor;
3678 ix86_gen_andsp = gen_andsi3;
3679 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
3680 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3681 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3682 }
3683
3684 #ifdef USE_IX86_CLD
3685 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3686 if (!TARGET_64BIT)
3687 target_flags |= MASK_CLD & ~target_flags_explicit;
3688 #endif
3689
3690 if (!TARGET_64BIT && flag_pic)
3691 {
3692 if (flag_fentry > 0)
3693 sorry ("-mfentry isn't supported for 32-bit in combination with -fpic");
3694 flag_fentry = 0;
3695 }
3696 if (flag_fentry < 0)
3697 {
3698 #if defined(PROFILE_BEFORE_PROLOGUE)
3699 flag_fentry = 1;
3700 #else
3701 flag_fentry = 0;
3702 #endif
3703 }
3704
3705 /* Save the initial options in case the user does function specific options */
3706 if (main_args_p)
3707 target_option_default_node = target_option_current_node
3708 = build_target_option_node ();
3709 }
3710
3711 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3712
3713 static void
3714 ix86_option_override (void)
3715 {
3716 ix86_option_override_internal (true);
3717 }
3718
3719 /* Update register usage after having seen the compiler flags. */
3720
3721 void
3722 ix86_conditional_register_usage (void)
3723 {
3724 int i;
3725 unsigned int j;
3726
3727 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3728 {
3729 if (fixed_regs[i] > 1)
3730 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3731 if (call_used_regs[i] > 1)
3732 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3733 }
3734
3735 /* The PIC register, if it exists, is fixed. */
3736 j = PIC_OFFSET_TABLE_REGNUM;
3737 if (j != INVALID_REGNUM)
3738 fixed_regs[j] = call_used_regs[j] = 1;
3739
3740 /* The MS_ABI changes the set of call-used registers. */
3741 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3742 {
3743 call_used_regs[SI_REG] = 0;
3744 call_used_regs[DI_REG] = 0;
3745 call_used_regs[XMM6_REG] = 0;
3746 call_used_regs[XMM7_REG] = 0;
3747 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3748 call_used_regs[i] = 0;
3749 }
3750
3751 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3752 other call-clobbered regs for 64-bit. */
3753 if (TARGET_64BIT)
3754 {
3755 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3756
3757 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3758 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3759 && call_used_regs[i])
3760 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3761 }
3762
3763 /* If MMX is disabled, squash the registers. */
3764 if (! TARGET_MMX)
3765 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3766 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3767 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3768
3769 /* If SSE is disabled, squash the registers. */
3770 if (! TARGET_SSE)
3771 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3772 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3773 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3774
3775 /* If the FPU is disabled, squash the registers. */
3776 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3777 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3778 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3779 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3780
3781 /* If 32-bit, squash the 64-bit registers. */
3782 if (! TARGET_64BIT)
3783 {
3784 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3785 reg_names[i] = "";
3786 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3787 reg_names[i] = "";
3788 }
3789 }
3790
3791 \f
3792 /* Save the current options */
3793
3794 static void
3795 ix86_function_specific_save (struct cl_target_option *ptr)
3796 {
3797 ptr->arch = ix86_arch;
3798 ptr->schedule = ix86_schedule;
3799 ptr->tune = ix86_tune;
3800 ptr->fpmath = ix86_fpmath;
3801 ptr->branch_cost = ix86_branch_cost;
3802 ptr->tune_defaulted = ix86_tune_defaulted;
3803 ptr->arch_specified = ix86_arch_specified;
3804 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3805 ptr->ix86_target_flags_explicit = target_flags_explicit;
3806
3807 /* The fields are char but the variables are not; make sure the
3808 values fit in the fields. */
3809 gcc_assert (ptr->arch == ix86_arch);
3810 gcc_assert (ptr->schedule == ix86_schedule);
3811 gcc_assert (ptr->tune == ix86_tune);
3812 gcc_assert (ptr->fpmath == ix86_fpmath);
3813 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3814 }
3815
3816 /* Restore the current options */
3817
3818 static void
3819 ix86_function_specific_restore (struct cl_target_option *ptr)
3820 {
3821 enum processor_type old_tune = ix86_tune;
3822 enum processor_type old_arch = ix86_arch;
3823 unsigned int ix86_arch_mask, ix86_tune_mask;
3824 int i;
3825
3826 ix86_arch = (enum processor_type) ptr->arch;
3827 ix86_schedule = (enum attr_cpu) ptr->schedule;
3828 ix86_tune = (enum processor_type) ptr->tune;
3829 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3830 ix86_branch_cost = ptr->branch_cost;
3831 ix86_tune_defaulted = ptr->tune_defaulted;
3832 ix86_arch_specified = ptr->arch_specified;
3833 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3834 target_flags_explicit = ptr->ix86_target_flags_explicit;
3835
3836 /* Recreate the arch feature tests if the arch changed */
3837 if (old_arch != ix86_arch)
3838 {
3839 ix86_arch_mask = 1u << ix86_arch;
3840 for (i = 0; i < X86_ARCH_LAST; ++i)
3841 ix86_arch_features[i]
3842 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3843 }
3844
3845 /* Recreate the tune optimization tests */
3846 if (old_tune != ix86_tune)
3847 {
3848 ix86_tune_mask = 1u << ix86_tune;
3849 for (i = 0; i < X86_TUNE_LAST; ++i)
3850 ix86_tune_features[i]
3851 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3852 }
3853 }
3854
3855 /* Print the current options */
3856
3857 static void
3858 ix86_function_specific_print (FILE *file, int indent,
3859 struct cl_target_option *ptr)
3860 {
3861 char *target_string
3862 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
3863 NULL, NULL, NULL, false);
3864
3865 fprintf (file, "%*sarch = %d (%s)\n",
3866 indent, "",
3867 ptr->arch,
3868 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3869 ? cpu_names[ptr->arch]
3870 : "<unknown>"));
3871
3872 fprintf (file, "%*stune = %d (%s)\n",
3873 indent, "",
3874 ptr->tune,
3875 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3876 ? cpu_names[ptr->tune]
3877 : "<unknown>"));
3878
3879 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3880 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3881 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3882 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3883
3884 if (target_string)
3885 {
3886 fprintf (file, "%*s%s\n", indent, "", target_string);
3887 free (target_string);
3888 }
3889 }
3890
3891 \f
3892 /* Inner function to process the attribute((target(...))), take an argument and
3893 set the current options from the argument. If we have a list, recursively go
3894 over the list. */
3895
3896 static bool
3897 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3898 {
3899 char *next_optstr;
3900 bool ret = true;
3901
3902 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3903 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3904 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3905 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3906
3907 enum ix86_opt_type
3908 {
3909 ix86_opt_unknown,
3910 ix86_opt_yes,
3911 ix86_opt_no,
3912 ix86_opt_str,
3913 ix86_opt_isa
3914 };
3915
3916 static const struct
3917 {
3918 const char *string;
3919 size_t len;
3920 enum ix86_opt_type type;
3921 int opt;
3922 int mask;
3923 } attrs[] = {
3924 /* isa options */
3925 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3926 IX86_ATTR_ISA ("abm", OPT_mabm),
3927 IX86_ATTR_ISA ("aes", OPT_maes),
3928 IX86_ATTR_ISA ("avx", OPT_mavx),
3929 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3930 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3931 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3932 IX86_ATTR_ISA ("sse", OPT_msse),
3933 IX86_ATTR_ISA ("sse2", OPT_msse2),
3934 IX86_ATTR_ISA ("sse3", OPT_msse3),
3935 IX86_ATTR_ISA ("sse4", OPT_msse4),
3936 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3937 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3938 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3939 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3940 IX86_ATTR_ISA ("fma4", OPT_mfma4),
3941 IX86_ATTR_ISA ("xop", OPT_mxop),
3942 IX86_ATTR_ISA ("lwp", OPT_mlwp),
3943 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
3944 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
3945 IX86_ATTR_ISA ("f16c", OPT_mf16c),
3946
3947 /* string options */
3948 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3949 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3950 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3951
3952 /* flag options */
3953 IX86_ATTR_YES ("cld",
3954 OPT_mcld,
3955 MASK_CLD),
3956
3957 IX86_ATTR_NO ("fancy-math-387",
3958 OPT_mfancy_math_387,
3959 MASK_NO_FANCY_MATH_387),
3960
3961 IX86_ATTR_YES ("ieee-fp",
3962 OPT_mieee_fp,
3963 MASK_IEEE_FP),
3964
3965 IX86_ATTR_YES ("inline-all-stringops",
3966 OPT_minline_all_stringops,
3967 MASK_INLINE_ALL_STRINGOPS),
3968
3969 IX86_ATTR_YES ("inline-stringops-dynamically",
3970 OPT_minline_stringops_dynamically,
3971 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3972
3973 IX86_ATTR_NO ("align-stringops",
3974 OPT_mno_align_stringops,
3975 MASK_NO_ALIGN_STRINGOPS),
3976
3977 IX86_ATTR_YES ("recip",
3978 OPT_mrecip,
3979 MASK_RECIP),
3980
3981 };
3982
3983 /* If this is a list, recurse to get the options. */
3984 if (TREE_CODE (args) == TREE_LIST)
3985 {
3986 bool ret = true;
3987
3988 for (; args; args = TREE_CHAIN (args))
3989 if (TREE_VALUE (args)
3990 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3991 ret = false;
3992
3993 return ret;
3994 }
3995
3996 else if (TREE_CODE (args) != STRING_CST)
3997 gcc_unreachable ();
3998
3999 /* Handle multiple arguments separated by commas. */
4000 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4001
4002 while (next_optstr && *next_optstr != '\0')
4003 {
4004 char *p = next_optstr;
4005 char *orig_p = p;
4006 char *comma = strchr (next_optstr, ',');
4007 const char *opt_string;
4008 size_t len, opt_len;
4009 int opt;
4010 bool opt_set_p;
4011 char ch;
4012 unsigned i;
4013 enum ix86_opt_type type = ix86_opt_unknown;
4014 int mask = 0;
4015
4016 if (comma)
4017 {
4018 *comma = '\0';
4019 len = comma - next_optstr;
4020 next_optstr = comma + 1;
4021 }
4022 else
4023 {
4024 len = strlen (p);
4025 next_optstr = NULL;
4026 }
4027
4028 /* Recognize no-xxx. */
4029 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4030 {
4031 opt_set_p = false;
4032 p += 3;
4033 len -= 3;
4034 }
4035 else
4036 opt_set_p = true;
4037
4038 /* Find the option. */
4039 ch = *p;
4040 opt = N_OPTS;
4041 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4042 {
4043 type = attrs[i].type;
4044 opt_len = attrs[i].len;
4045 if (ch == attrs[i].string[0]
4046 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
4047 && memcmp (p, attrs[i].string, opt_len) == 0)
4048 {
4049 opt = attrs[i].opt;
4050 mask = attrs[i].mask;
4051 opt_string = attrs[i].string;
4052 break;
4053 }
4054 }
4055
4056 /* Process the option. */
4057 if (opt == N_OPTS)
4058 {
4059 error ("attribute(target(\"%s\")) is unknown", orig_p);
4060 ret = false;
4061 }
4062
4063 else if (type == ix86_opt_isa)
4064 ix86_handle_option (opt, p, opt_set_p);
4065
4066 else if (type == ix86_opt_yes || type == ix86_opt_no)
4067 {
4068 if (type == ix86_opt_no)
4069 opt_set_p = !opt_set_p;
4070
4071 if (opt_set_p)
4072 target_flags |= mask;
4073 else
4074 target_flags &= ~mask;
4075 }
4076
4077 else if (type == ix86_opt_str)
4078 {
4079 if (p_strings[opt])
4080 {
4081 error ("option(\"%s\") was already specified", opt_string);
4082 ret = false;
4083 }
4084 else
4085 p_strings[opt] = xstrdup (p + opt_len);
4086 }
4087
4088 else
4089 gcc_unreachable ();
4090 }
4091
4092 return ret;
4093 }
4094
4095 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4096
4097 tree
4098 ix86_valid_target_attribute_tree (tree args)
4099 {
4100 const char *orig_arch_string = ix86_arch_string;
4101 const char *orig_tune_string = ix86_tune_string;
4102 const char *orig_fpmath_string = ix86_fpmath_string;
4103 int orig_tune_defaulted = ix86_tune_defaulted;
4104 int orig_arch_specified = ix86_arch_specified;
4105 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
4106 tree t = NULL_TREE;
4107 int i;
4108 struct cl_target_option *def
4109 = TREE_TARGET_OPTION (target_option_default_node);
4110
4111 /* Process each of the options on the chain. */
4112 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
4113 return NULL_TREE;
4114
4115 /* If the changed options are different from the default, rerun
4116 ix86_option_override_internal, and then save the options away.
4117 The string options are are attribute options, and will be undone
4118 when we copy the save structure. */
4119 if (ix86_isa_flags != def->x_ix86_isa_flags
4120 || target_flags != def->x_target_flags
4121 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4122 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4123 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4124 {
4125 /* If we are using the default tune= or arch=, undo the string assigned,
4126 and use the default. */
4127 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4128 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4129 else if (!orig_arch_specified)
4130 ix86_arch_string = NULL;
4131
4132 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4133 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4134 else if (orig_tune_defaulted)
4135 ix86_tune_string = NULL;
4136
4137 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4138 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4139 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
4140 else if (!TARGET_64BIT && TARGET_SSE)
4141 ix86_fpmath_string = "sse,387";
4142
4143 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4144 ix86_option_override_internal (false);
4145
4146 /* Add any builtin functions with the new isa if any. */
4147 ix86_add_new_builtins (ix86_isa_flags);
4148
4149 /* Save the current options unless we are validating options for
4150 #pragma. */
4151 t = build_target_option_node ();
4152
4153 ix86_arch_string = orig_arch_string;
4154 ix86_tune_string = orig_tune_string;
4155 ix86_fpmath_string = orig_fpmath_string;
4156
4157 /* Free up memory allocated to hold the strings */
4158 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4159 if (option_strings[i])
4160 free (option_strings[i]);
4161 }
4162
4163 return t;
4164 }
4165
4166 /* Hook to validate attribute((target("string"))). */
4167
4168 static bool
4169 ix86_valid_target_attribute_p (tree fndecl,
4170 tree ARG_UNUSED (name),
4171 tree args,
4172 int ARG_UNUSED (flags))
4173 {
4174 struct cl_target_option cur_target;
4175 bool ret = true;
4176 tree old_optimize = build_optimization_node ();
4177 tree new_target, new_optimize;
4178 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4179
4180 /* If the function changed the optimization levels as well as setting target
4181 options, start with the optimizations specified. */
4182 if (func_optimize && func_optimize != old_optimize)
4183 cl_optimization_restore (&global_options,
4184 TREE_OPTIMIZATION (func_optimize));
4185
4186 /* The target attributes may also change some optimization flags, so update
4187 the optimization options if necessary. */
4188 cl_target_option_save (&cur_target, &global_options);
4189 new_target = ix86_valid_target_attribute_tree (args);
4190 new_optimize = build_optimization_node ();
4191
4192 if (!new_target)
4193 ret = false;
4194
4195 else if (fndecl)
4196 {
4197 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4198
4199 if (old_optimize != new_optimize)
4200 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4201 }
4202
4203 cl_target_option_restore (&global_options, &cur_target);
4204
4205 if (old_optimize != new_optimize)
4206 cl_optimization_restore (&global_options,
4207 TREE_OPTIMIZATION (old_optimize));
4208
4209 return ret;
4210 }
4211
4212 \f
4213 /* Hook to determine if one function can safely inline another. */
4214
4215 static bool
4216 ix86_can_inline_p (tree caller, tree callee)
4217 {
4218 bool ret = false;
4219 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4220 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4221
4222 /* If callee has no option attributes, then it is ok to inline. */
4223 if (!callee_tree)
4224 ret = true;
4225
4226 /* If caller has no option attributes, but callee does then it is not ok to
4227 inline. */
4228 else if (!caller_tree)
4229 ret = false;
4230
4231 else
4232 {
4233 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4234 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4235
4236 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4237 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4238 function. */
4239 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4240 != callee_opts->x_ix86_isa_flags)
4241 ret = false;
4242
4243 /* See if we have the same non-isa options. */
4244 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4245 ret = false;
4246
4247 /* See if arch, tune, etc. are the same. */
4248 else if (caller_opts->arch != callee_opts->arch)
4249 ret = false;
4250
4251 else if (caller_opts->tune != callee_opts->tune)
4252 ret = false;
4253
4254 else if (caller_opts->fpmath != callee_opts->fpmath)
4255 ret = false;
4256
4257 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4258 ret = false;
4259
4260 else
4261 ret = true;
4262 }
4263
4264 return ret;
4265 }
4266
4267 \f
4268 /* Remember the last target of ix86_set_current_function. */
4269 static GTY(()) tree ix86_previous_fndecl;
4270
4271 /* Establish appropriate back-end context for processing the function
4272 FNDECL. The argument might be NULL to indicate processing at top
4273 level, outside of any function scope. */
4274 static void
4275 ix86_set_current_function (tree fndecl)
4276 {
4277 /* Only change the context if the function changes. This hook is called
4278 several times in the course of compiling a function, and we don't want to
4279 slow things down too much or call target_reinit when it isn't safe. */
4280 if (fndecl && fndecl != ix86_previous_fndecl)
4281 {
4282 tree old_tree = (ix86_previous_fndecl
4283 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4284 : NULL_TREE);
4285
4286 tree new_tree = (fndecl
4287 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4288 : NULL_TREE);
4289
4290 ix86_previous_fndecl = fndecl;
4291 if (old_tree == new_tree)
4292 ;
4293
4294 else if (new_tree)
4295 {
4296 cl_target_option_restore (&global_options,
4297 TREE_TARGET_OPTION (new_tree));
4298 target_reinit ();
4299 }
4300
4301 else if (old_tree)
4302 {
4303 struct cl_target_option *def
4304 = TREE_TARGET_OPTION (target_option_current_node);
4305
4306 cl_target_option_restore (&global_options, def);
4307 target_reinit ();
4308 }
4309 }
4310 }
4311
4312 \f
4313 /* Return true if this goes in large data/bss. */
4314
4315 static bool
4316 ix86_in_large_data_p (tree exp)
4317 {
4318 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4319 return false;
4320
4321 /* Functions are never large data. */
4322 if (TREE_CODE (exp) == FUNCTION_DECL)
4323 return false;
4324
4325 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4326 {
4327 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4328 if (strcmp (section, ".ldata") == 0
4329 || strcmp (section, ".lbss") == 0)
4330 return true;
4331 return false;
4332 }
4333 else
4334 {
4335 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4336
4337 /* If this is an incomplete type with size 0, then we can't put it
4338 in data because it might be too big when completed. */
4339 if (!size || size > ix86_section_threshold)
4340 return true;
4341 }
4342
4343 return false;
4344 }
4345
4346 /* Switch to the appropriate section for output of DECL.
4347 DECL is either a `VAR_DECL' node or a constant of some sort.
4348 RELOC indicates whether forming the initial value of DECL requires
4349 link-time relocations. */
4350
4351 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4352 ATTRIBUTE_UNUSED;
4353
4354 static section *
4355 x86_64_elf_select_section (tree decl, int reloc,
4356 unsigned HOST_WIDE_INT align)
4357 {
4358 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4359 && ix86_in_large_data_p (decl))
4360 {
4361 const char *sname = NULL;
4362 unsigned int flags = SECTION_WRITE;
4363 switch (categorize_decl_for_section (decl, reloc))
4364 {
4365 case SECCAT_DATA:
4366 sname = ".ldata";
4367 break;
4368 case SECCAT_DATA_REL:
4369 sname = ".ldata.rel";
4370 break;
4371 case SECCAT_DATA_REL_LOCAL:
4372 sname = ".ldata.rel.local";
4373 break;
4374 case SECCAT_DATA_REL_RO:
4375 sname = ".ldata.rel.ro";
4376 break;
4377 case SECCAT_DATA_REL_RO_LOCAL:
4378 sname = ".ldata.rel.ro.local";
4379 break;
4380 case SECCAT_BSS:
4381 sname = ".lbss";
4382 flags |= SECTION_BSS;
4383 break;
4384 case SECCAT_RODATA:
4385 case SECCAT_RODATA_MERGE_STR:
4386 case SECCAT_RODATA_MERGE_STR_INIT:
4387 case SECCAT_RODATA_MERGE_CONST:
4388 sname = ".lrodata";
4389 flags = 0;
4390 break;
4391 case SECCAT_SRODATA:
4392 case SECCAT_SDATA:
4393 case SECCAT_SBSS:
4394 gcc_unreachable ();
4395 case SECCAT_TEXT:
4396 case SECCAT_TDATA:
4397 case SECCAT_TBSS:
4398 /* We don't split these for medium model. Place them into
4399 default sections and hope for best. */
4400 break;
4401 }
4402 if (sname)
4403 {
4404 /* We might get called with string constants, but get_named_section
4405 doesn't like them as they are not DECLs. Also, we need to set
4406 flags in that case. */
4407 if (!DECL_P (decl))
4408 return get_section (sname, flags, NULL);
4409 return get_named_section (decl, sname, reloc);
4410 }
4411 }
4412 return default_elf_select_section (decl, reloc, align);
4413 }
4414
4415 /* Build up a unique section name, expressed as a
4416 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4417 RELOC indicates whether the initial value of EXP requires
4418 link-time relocations. */
4419
4420 static void ATTRIBUTE_UNUSED
4421 x86_64_elf_unique_section (tree decl, int reloc)
4422 {
4423 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4424 && ix86_in_large_data_p (decl))
4425 {
4426 const char *prefix = NULL;
4427 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4428 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4429
4430 switch (categorize_decl_for_section (decl, reloc))
4431 {
4432 case SECCAT_DATA:
4433 case SECCAT_DATA_REL:
4434 case SECCAT_DATA_REL_LOCAL:
4435 case SECCAT_DATA_REL_RO:
4436 case SECCAT_DATA_REL_RO_LOCAL:
4437 prefix = one_only ? ".ld" : ".ldata";
4438 break;
4439 case SECCAT_BSS:
4440 prefix = one_only ? ".lb" : ".lbss";
4441 break;
4442 case SECCAT_RODATA:
4443 case SECCAT_RODATA_MERGE_STR:
4444 case SECCAT_RODATA_MERGE_STR_INIT:
4445 case SECCAT_RODATA_MERGE_CONST:
4446 prefix = one_only ? ".lr" : ".lrodata";
4447 break;
4448 case SECCAT_SRODATA:
4449 case SECCAT_SDATA:
4450 case SECCAT_SBSS:
4451 gcc_unreachable ();
4452 case SECCAT_TEXT:
4453 case SECCAT_TDATA:
4454 case SECCAT_TBSS:
4455 /* We don't split these for medium model. Place them into
4456 default sections and hope for best. */
4457 break;
4458 }
4459 if (prefix)
4460 {
4461 const char *name, *linkonce;
4462 char *string;
4463
4464 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4465 name = targetm.strip_name_encoding (name);
4466
4467 /* If we're using one_only, then there needs to be a .gnu.linkonce
4468 prefix to the section name. */
4469 linkonce = one_only ? ".gnu.linkonce" : "";
4470
4471 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4472
4473 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4474 return;
4475 }
4476 }
4477 default_unique_section (decl, reloc);
4478 }
4479
4480 #ifdef COMMON_ASM_OP
4481 /* This says how to output assembler code to declare an
4482 uninitialized external linkage data object.
4483
4484 For medium model x86-64 we need to use .largecomm opcode for
4485 large objects. */
4486 void
4487 x86_elf_aligned_common (FILE *file,
4488 const char *name, unsigned HOST_WIDE_INT size,
4489 int align)
4490 {
4491 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4492 && size > (unsigned int)ix86_section_threshold)
4493 fputs (".largecomm\t", file);
4494 else
4495 fputs (COMMON_ASM_OP, file);
4496 assemble_name (file, name);
4497 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4498 size, align / BITS_PER_UNIT);
4499 }
4500 #endif
4501
4502 /* Utility function for targets to use in implementing
4503 ASM_OUTPUT_ALIGNED_BSS. */
4504
4505 void
4506 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4507 const char *name, unsigned HOST_WIDE_INT size,
4508 int align)
4509 {
4510 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4511 && size > (unsigned int)ix86_section_threshold)
4512 switch_to_section (get_named_section (decl, ".lbss", 0));
4513 else
4514 switch_to_section (bss_section);
4515 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4516 #ifdef ASM_DECLARE_OBJECT_NAME
4517 last_assemble_variable_decl = decl;
4518 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4519 #else
4520 /* Standard thing is just output label for the object. */
4521 ASM_OUTPUT_LABEL (file, name);
4522 #endif /* ASM_DECLARE_OBJECT_NAME */
4523 ASM_OUTPUT_SKIP (file, size ? size : 1);
4524 }
4525 \f
4526 static void
4527 ix86_option_optimization (int level, int size ATTRIBUTE_UNUSED)
4528 {
4529 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4530 make the problem with not enough registers even worse. */
4531 #ifdef INSN_SCHEDULING
4532 if (level > 1)
4533 flag_schedule_insns = 0;
4534 #endif
4535
4536 if (TARGET_MACHO)
4537 /* The Darwin libraries never set errno, so we might as well
4538 avoid calling them when that's the only reason we would. */
4539 flag_errno_math = 0;
4540
4541 /* The default values of these switches depend on the TARGET_64BIT
4542 that is not known at this moment. Mark these values with 2 and
4543 let user the to override these. In case there is no command line
4544 option specifying them, we will set the defaults in
4545 ix86_option_override_internal. */
4546 if (optimize >= 1)
4547 flag_omit_frame_pointer = 2;
4548
4549 /* For -O2 and beyond, turn on -fzee for x86_64 target. */
4550 if (level > 1)
4551 flag_zee = 2;
4552
4553 flag_pcc_struct_return = 2;
4554 flag_asynchronous_unwind_tables = 2;
4555 flag_vect_cost_model = 1;
4556 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4557 SUBTARGET_OPTIMIZATION_OPTIONS;
4558 #endif
4559 }
4560
4561 /* Decide whether we must probe the stack before any space allocation
4562 on this target. It's essentially TARGET_STACK_PROBE except when
4563 -fstack-check causes the stack to be already probed differently. */
4564
4565 bool
4566 ix86_target_stack_probe (void)
4567 {
4568 /* Do not probe the stack twice if static stack checking is enabled. */
4569 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4570 return false;
4571
4572 return TARGET_STACK_PROBE;
4573 }
4574 \f
4575 /* Decide whether we can make a sibling call to a function. DECL is the
4576 declaration of the function being targeted by the call and EXP is the
4577 CALL_EXPR representing the call. */
4578
4579 static bool
4580 ix86_function_ok_for_sibcall (tree decl, tree exp)
4581 {
4582 tree type, decl_or_type;
4583 rtx a, b;
4584
4585 /* If we are generating position-independent code, we cannot sibcall
4586 optimize any indirect call, or a direct call to a global function,
4587 as the PLT requires %ebx be live. */
4588 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4589 return false;
4590
4591 /* If we need to align the outgoing stack, then sibcalling would
4592 unalign the stack, which may break the called function. */
4593 if (ix86_minimum_incoming_stack_boundary (true)
4594 < PREFERRED_STACK_BOUNDARY)
4595 return false;
4596
4597 if (decl)
4598 {
4599 decl_or_type = decl;
4600 type = TREE_TYPE (decl);
4601 }
4602 else
4603 {
4604 /* We're looking at the CALL_EXPR, we need the type of the function. */
4605 type = CALL_EXPR_FN (exp); /* pointer expression */
4606 type = TREE_TYPE (type); /* pointer type */
4607 type = TREE_TYPE (type); /* function type */
4608 decl_or_type = type;
4609 }
4610
4611 /* Check that the return value locations are the same. Like
4612 if we are returning floats on the 80387 register stack, we cannot
4613 make a sibcall from a function that doesn't return a float to a
4614 function that does or, conversely, from a function that does return
4615 a float to a function that doesn't; the necessary stack adjustment
4616 would not be executed. This is also the place we notice
4617 differences in the return value ABI. Note that it is ok for one
4618 of the functions to have void return type as long as the return
4619 value of the other is passed in a register. */
4620 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4621 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4622 cfun->decl, false);
4623 if (STACK_REG_P (a) || STACK_REG_P (b))
4624 {
4625 if (!rtx_equal_p (a, b))
4626 return false;
4627 }
4628 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4629 ;
4630 else if (!rtx_equal_p (a, b))
4631 return false;
4632
4633 if (TARGET_64BIT)
4634 {
4635 /* The SYSV ABI has more call-clobbered registers;
4636 disallow sibcalls from MS to SYSV. */
4637 if (cfun->machine->call_abi == MS_ABI
4638 && ix86_function_type_abi (type) == SYSV_ABI)
4639 return false;
4640 }
4641 else
4642 {
4643 /* If this call is indirect, we'll need to be able to use a
4644 call-clobbered register for the address of the target function.
4645 Make sure that all such registers are not used for passing
4646 parameters. Note that DLLIMPORT functions are indirect. */
4647 if (!decl
4648 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4649 {
4650 if (ix86_function_regparm (type, NULL) >= 3)
4651 {
4652 /* ??? Need to count the actual number of registers to be used,
4653 not the possible number of registers. Fix later. */
4654 return false;
4655 }
4656 }
4657 }
4658
4659 /* Otherwise okay. That also includes certain types of indirect calls. */
4660 return true;
4661 }
4662
4663 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4664 and "sseregparm" calling convention attributes;
4665 arguments as in struct attribute_spec.handler. */
4666
4667 static tree
4668 ix86_handle_cconv_attribute (tree *node, tree name,
4669 tree args,
4670 int flags ATTRIBUTE_UNUSED,
4671 bool *no_add_attrs)
4672 {
4673 if (TREE_CODE (*node) != FUNCTION_TYPE
4674 && TREE_CODE (*node) != METHOD_TYPE
4675 && TREE_CODE (*node) != FIELD_DECL
4676 && TREE_CODE (*node) != TYPE_DECL)
4677 {
4678 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4679 name);
4680 *no_add_attrs = true;
4681 return NULL_TREE;
4682 }
4683
4684 /* Can combine regparm with all attributes but fastcall. */
4685 if (is_attribute_p ("regparm", name))
4686 {
4687 tree cst;
4688
4689 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4690 {
4691 error ("fastcall and regparm attributes are not compatible");
4692 }
4693
4694 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4695 {
4696 error ("regparam and thiscall attributes are not compatible");
4697 }
4698
4699 cst = TREE_VALUE (args);
4700 if (TREE_CODE (cst) != INTEGER_CST)
4701 {
4702 warning (OPT_Wattributes,
4703 "%qE attribute requires an integer constant argument",
4704 name);
4705 *no_add_attrs = true;
4706 }
4707 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4708 {
4709 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4710 name, REGPARM_MAX);
4711 *no_add_attrs = true;
4712 }
4713
4714 return NULL_TREE;
4715 }
4716
4717 if (TARGET_64BIT)
4718 {
4719 /* Do not warn when emulating the MS ABI. */
4720 if ((TREE_CODE (*node) != FUNCTION_TYPE
4721 && TREE_CODE (*node) != METHOD_TYPE)
4722 || ix86_function_type_abi (*node) != MS_ABI)
4723 warning (OPT_Wattributes, "%qE attribute ignored",
4724 name);
4725 *no_add_attrs = true;
4726 return NULL_TREE;
4727 }
4728
4729 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4730 if (is_attribute_p ("fastcall", name))
4731 {
4732 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4733 {
4734 error ("fastcall and cdecl attributes are not compatible");
4735 }
4736 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4737 {
4738 error ("fastcall and stdcall attributes are not compatible");
4739 }
4740 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4741 {
4742 error ("fastcall and regparm attributes are not compatible");
4743 }
4744 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4745 {
4746 error ("fastcall and thiscall attributes are not compatible");
4747 }
4748 }
4749
4750 /* Can combine stdcall with fastcall (redundant), regparm and
4751 sseregparm. */
4752 else if (is_attribute_p ("stdcall", name))
4753 {
4754 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4755 {
4756 error ("stdcall and cdecl attributes are not compatible");
4757 }
4758 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4759 {
4760 error ("stdcall and fastcall attributes are not compatible");
4761 }
4762 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4763 {
4764 error ("stdcall and thiscall attributes are not compatible");
4765 }
4766 }
4767
4768 /* Can combine cdecl with regparm and sseregparm. */
4769 else if (is_attribute_p ("cdecl", name))
4770 {
4771 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4772 {
4773 error ("stdcall and cdecl attributes are not compatible");
4774 }
4775 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4776 {
4777 error ("fastcall and cdecl attributes are not compatible");
4778 }
4779 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4780 {
4781 error ("cdecl and thiscall attributes are not compatible");
4782 }
4783 }
4784 else if (is_attribute_p ("thiscall", name))
4785 {
4786 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
4787 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
4788 name);
4789 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4790 {
4791 error ("stdcall and thiscall attributes are not compatible");
4792 }
4793 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4794 {
4795 error ("fastcall and thiscall attributes are not compatible");
4796 }
4797 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4798 {
4799 error ("cdecl and thiscall attributes are not compatible");
4800 }
4801 }
4802
4803 /* Can combine sseregparm with all attributes. */
4804
4805 return NULL_TREE;
4806 }
4807
4808 /* Return 0 if the attributes for two types are incompatible, 1 if they
4809 are compatible, and 2 if they are nearly compatible (which causes a
4810 warning to be generated). */
4811
4812 static int
4813 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4814 {
4815 /* Check for mismatch of non-default calling convention. */
4816 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4817
4818 if (TREE_CODE (type1) != FUNCTION_TYPE
4819 && TREE_CODE (type1) != METHOD_TYPE)
4820 return 1;
4821
4822 /* Check for mismatched fastcall/regparm types. */
4823 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4824 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4825 || (ix86_function_regparm (type1, NULL)
4826 != ix86_function_regparm (type2, NULL)))
4827 return 0;
4828
4829 /* Check for mismatched sseregparm types. */
4830 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4831 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4832 return 0;
4833
4834 /* Check for mismatched thiscall types. */
4835 if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1))
4836 != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2)))
4837 return 0;
4838
4839 /* Check for mismatched return types (cdecl vs stdcall). */
4840 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4841 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4842 return 0;
4843
4844 return 1;
4845 }
4846 \f
4847 /* Return the regparm value for a function with the indicated TYPE and DECL.
4848 DECL may be NULL when calling function indirectly
4849 or considering a libcall. */
4850
4851 static int
4852 ix86_function_regparm (const_tree type, const_tree decl)
4853 {
4854 tree attr;
4855 int regparm;
4856
4857 if (TARGET_64BIT)
4858 return (ix86_function_type_abi (type) == SYSV_ABI
4859 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4860
4861 regparm = ix86_regparm;
4862 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4863 if (attr)
4864 {
4865 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4866 return regparm;
4867 }
4868
4869 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4870 return 2;
4871
4872 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
4873 return 1;
4874
4875 /* Use register calling convention for local functions when possible. */
4876 if (decl
4877 && TREE_CODE (decl) == FUNCTION_DECL
4878 && optimize
4879 && !(profile_flag && !flag_fentry))
4880 {
4881 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4882 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
4883 if (i && i->local)
4884 {
4885 int local_regparm, globals = 0, regno;
4886
4887 /* Make sure no regparm register is taken by a
4888 fixed register variable. */
4889 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4890 if (fixed_regs[local_regparm])
4891 break;
4892
4893 /* We don't want to use regparm(3) for nested functions as
4894 these use a static chain pointer in the third argument. */
4895 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
4896 local_regparm = 2;
4897
4898 /* In 32-bit mode save a register for the split stack. */
4899 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
4900 local_regparm = 2;
4901
4902 /* Each fixed register usage increases register pressure,
4903 so less registers should be used for argument passing.
4904 This functionality can be overriden by an explicit
4905 regparm value. */
4906 for (regno = 0; regno <= DI_REG; regno++)
4907 if (fixed_regs[regno])
4908 globals++;
4909
4910 local_regparm
4911 = globals < local_regparm ? local_regparm - globals : 0;
4912
4913 if (local_regparm > regparm)
4914 regparm = local_regparm;
4915 }
4916 }
4917
4918 return regparm;
4919 }
4920
4921 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4922 DFmode (2) arguments in SSE registers for a function with the
4923 indicated TYPE and DECL. DECL may be NULL when calling function
4924 indirectly or considering a libcall. Otherwise return 0. */
4925
4926 static int
4927 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4928 {
4929 gcc_assert (!TARGET_64BIT);
4930
4931 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4932 by the sseregparm attribute. */
4933 if (TARGET_SSEREGPARM
4934 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4935 {
4936 if (!TARGET_SSE)
4937 {
4938 if (warn)
4939 {
4940 if (decl)
4941 error ("Calling %qD with attribute sseregparm without "
4942 "SSE/SSE2 enabled", decl);
4943 else
4944 error ("Calling %qT with attribute sseregparm without "
4945 "SSE/SSE2 enabled", type);
4946 }
4947 return 0;
4948 }
4949
4950 return 2;
4951 }
4952
4953 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4954 (and DFmode for SSE2) arguments in SSE registers. */
4955 if (decl && TARGET_SSE_MATH && optimize
4956 && !(profile_flag && !flag_fentry))
4957 {
4958 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4959 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4960 if (i && i->local)
4961 return TARGET_SSE2 ? 2 : 1;
4962 }
4963
4964 return 0;
4965 }
4966
4967 /* Return true if EAX is live at the start of the function. Used by
4968 ix86_expand_prologue to determine if we need special help before
4969 calling allocate_stack_worker. */
4970
4971 static bool
4972 ix86_eax_live_at_start_p (void)
4973 {
4974 /* Cheat. Don't bother working forward from ix86_function_regparm
4975 to the function type to whether an actual argument is located in
4976 eax. Instead just look at cfg info, which is still close enough
4977 to correct at this point. This gives false positives for broken
4978 functions that might use uninitialized data that happens to be
4979 allocated in eax, but who cares? */
4980 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4981 }
4982
4983 /* Value is the number of bytes of arguments automatically
4984 popped when returning from a subroutine call.
4985 FUNDECL is the declaration node of the function (as a tree),
4986 FUNTYPE is the data type of the function (as a tree),
4987 or for a library call it is an identifier node for the subroutine name.
4988 SIZE is the number of bytes of arguments passed on the stack.
4989
4990 On the 80386, the RTD insn may be used to pop them if the number
4991 of args is fixed, but if the number is variable then the caller
4992 must pop them all. RTD can't be used for library calls now
4993 because the library is compiled with the Unix compiler.
4994 Use of RTD is a selectable option, since it is incompatible with
4995 standard Unix calling sequences. If the option is not selected,
4996 the caller must always pop the args.
4997
4998 The attribute stdcall is equivalent to RTD on a per module basis. */
4999
5000 static int
5001 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5002 {
5003 int rtd;
5004
5005 /* None of the 64-bit ABIs pop arguments. */
5006 if (TARGET_64BIT)
5007 return 0;
5008
5009 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
5010
5011 /* Cdecl functions override -mrtd, and never pop the stack. */
5012 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
5013 {
5014 /* Stdcall and fastcall functions will pop the stack if not
5015 variable args. */
5016 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
5017 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))
5018 || lookup_attribute ("thiscall", TYPE_ATTRIBUTES (funtype)))
5019 rtd = 1;
5020
5021 if (rtd && ! stdarg_p (funtype))
5022 return size;
5023 }
5024
5025 /* Lose any fake structure return argument if it is passed on the stack. */
5026 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5027 && !KEEP_AGGREGATE_RETURN_POINTER)
5028 {
5029 int nregs = ix86_function_regparm (funtype, fundecl);
5030 if (nregs == 0)
5031 return GET_MODE_SIZE (Pmode);
5032 }
5033
5034 return 0;
5035 }
5036 \f
5037 /* Argument support functions. */
5038
5039 /* Return true when register may be used to pass function parameters. */
5040 bool
5041 ix86_function_arg_regno_p (int regno)
5042 {
5043 int i;
5044 const int *parm_regs;
5045
5046 if (!TARGET_64BIT)
5047 {
5048 if (TARGET_MACHO)
5049 return (regno < REGPARM_MAX
5050 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5051 else
5052 return (regno < REGPARM_MAX
5053 || (TARGET_MMX && MMX_REGNO_P (regno)
5054 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5055 || (TARGET_SSE && SSE_REGNO_P (regno)
5056 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5057 }
5058
5059 if (TARGET_MACHO)
5060 {
5061 if (SSE_REGNO_P (regno) && TARGET_SSE)
5062 return true;
5063 }
5064 else
5065 {
5066 if (TARGET_SSE && SSE_REGNO_P (regno)
5067 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5068 return true;
5069 }
5070
5071 /* TODO: The function should depend on current function ABI but
5072 builtins.c would need updating then. Therefore we use the
5073 default ABI. */
5074
5075 /* RAX is used as hidden argument to va_arg functions. */
5076 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5077 return true;
5078
5079 if (ix86_abi == MS_ABI)
5080 parm_regs = x86_64_ms_abi_int_parameter_registers;
5081 else
5082 parm_regs = x86_64_int_parameter_registers;
5083 for (i = 0; i < (ix86_abi == MS_ABI
5084 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5085 if (regno == parm_regs[i])
5086 return true;
5087 return false;
5088 }
5089
5090 /* Return if we do not know how to pass TYPE solely in registers. */
5091
5092 static bool
5093 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5094 {
5095 if (must_pass_in_stack_var_size_or_pad (mode, type))
5096 return true;
5097
5098 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5099 The layout_type routine is crafty and tries to trick us into passing
5100 currently unsupported vector types on the stack by using TImode. */
5101 return (!TARGET_64BIT && mode == TImode
5102 && type && TREE_CODE (type) != VECTOR_TYPE);
5103 }
5104
5105 /* It returns the size, in bytes, of the area reserved for arguments passed
5106 in registers for the function represented by fndecl dependent to the used
5107 abi format. */
5108 int
5109 ix86_reg_parm_stack_space (const_tree fndecl)
5110 {
5111 enum calling_abi call_abi = SYSV_ABI;
5112 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5113 call_abi = ix86_function_abi (fndecl);
5114 else
5115 call_abi = ix86_function_type_abi (fndecl);
5116 if (call_abi == MS_ABI)
5117 return 32;
5118 return 0;
5119 }
5120
5121 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5122 call abi used. */
5123 enum calling_abi
5124 ix86_function_type_abi (const_tree fntype)
5125 {
5126 if (TARGET_64BIT && fntype != NULL)
5127 {
5128 enum calling_abi abi = ix86_abi;
5129 if (abi == SYSV_ABI)
5130 {
5131 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5132 abi = MS_ABI;
5133 }
5134 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5135 abi = SYSV_ABI;
5136 return abi;
5137 }
5138 return ix86_abi;
5139 }
5140
5141 static bool
5142 ix86_function_ms_hook_prologue (const_tree fn)
5143 {
5144 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5145 {
5146 if (decl_function_context (fn) != NULL_TREE)
5147 error_at (DECL_SOURCE_LOCATION (fn),
5148 "ms_hook_prologue is not compatible with nested function");
5149 else
5150 return true;
5151 }
5152 return false;
5153 }
5154
5155 static enum calling_abi
5156 ix86_function_abi (const_tree fndecl)
5157 {
5158 if (! fndecl)
5159 return ix86_abi;
5160 return ix86_function_type_abi (TREE_TYPE (fndecl));
5161 }
5162
5163 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5164 call abi used. */
5165 enum calling_abi
5166 ix86_cfun_abi (void)
5167 {
5168 if (! cfun || ! TARGET_64BIT)
5169 return ix86_abi;
5170 return cfun->machine->call_abi;
5171 }
5172
5173 /* Write the extra assembler code needed to declare a function properly. */
5174
5175 void
5176 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5177 tree decl)
5178 {
5179 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5180
5181 if (is_ms_hook)
5182 {
5183 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5184 unsigned int filler_cc = 0xcccccccc;
5185
5186 for (i = 0; i < filler_count; i += 4)
5187 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5188 }
5189
5190 ASM_OUTPUT_LABEL (asm_out_file, fname);
5191
5192 /* Output magic byte marker, if hot-patch attribute is set. */
5193 if (is_ms_hook)
5194 {
5195 if (TARGET_64BIT)
5196 {
5197 /* leaq [%rsp + 0], %rsp */
5198 asm_fprintf (asm_out_file, ASM_BYTE
5199 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5200 }
5201 else
5202 {
5203 /* movl.s %edi, %edi
5204 push %ebp
5205 movl.s %esp, %ebp */
5206 asm_fprintf (asm_out_file, ASM_BYTE
5207 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5208 }
5209 }
5210 }
5211
5212 /* regclass.c */
5213 extern void init_regs (void);
5214
5215 /* Implementation of call abi switching target hook. Specific to FNDECL
5216 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
5217 for more details. */
5218 void
5219 ix86_call_abi_override (const_tree fndecl)
5220 {
5221 if (fndecl == NULL_TREE)
5222 cfun->machine->call_abi = ix86_abi;
5223 else
5224 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5225 }
5226
5227 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
5228 re-initialization of init_regs each time we switch function context since
5229 this is needed only during RTL expansion. */
5230 static void
5231 ix86_maybe_switch_abi (void)
5232 {
5233 if (TARGET_64BIT &&
5234 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5235 reinit_regs ();
5236 }
5237
5238 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5239 for a call to a function whose data type is FNTYPE.
5240 For a library call, FNTYPE is 0. */
5241
5242 void
5243 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5244 tree fntype, /* tree ptr for function decl */
5245 rtx libname, /* SYMBOL_REF of library name or 0 */
5246 tree fndecl)
5247 {
5248 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
5249 memset (cum, 0, sizeof (*cum));
5250
5251 if (fndecl)
5252 cum->call_abi = ix86_function_abi (fndecl);
5253 else
5254 cum->call_abi = ix86_function_type_abi (fntype);
5255 /* Set up the number of registers to use for passing arguments. */
5256
5257 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5258 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5259 "or subtarget optimization implying it");
5260 cum->nregs = ix86_regparm;
5261 if (TARGET_64BIT)
5262 {
5263 cum->nregs = (cum->call_abi == SYSV_ABI
5264 ? X86_64_REGPARM_MAX
5265 : X86_64_MS_REGPARM_MAX);
5266 }
5267 if (TARGET_SSE)
5268 {
5269 cum->sse_nregs = SSE_REGPARM_MAX;
5270 if (TARGET_64BIT)
5271 {
5272 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5273 ? X86_64_SSE_REGPARM_MAX
5274 : X86_64_MS_SSE_REGPARM_MAX);
5275 }
5276 }
5277 if (TARGET_MMX)
5278 cum->mmx_nregs = MMX_REGPARM_MAX;
5279 cum->warn_avx = true;
5280 cum->warn_sse = true;
5281 cum->warn_mmx = true;
5282
5283 /* Because type might mismatch in between caller and callee, we need to
5284 use actual type of function for local calls.
5285 FIXME: cgraph_analyze can be told to actually record if function uses
5286 va_start so for local functions maybe_vaarg can be made aggressive
5287 helping K&R code.
5288 FIXME: once typesytem is fixed, we won't need this code anymore. */
5289 if (i && i->local)
5290 fntype = TREE_TYPE (fndecl);
5291 cum->maybe_vaarg = (fntype
5292 ? (!prototype_p (fntype) || stdarg_p (fntype))
5293 : !libname);
5294
5295 if (!TARGET_64BIT)
5296 {
5297 /* If there are variable arguments, then we won't pass anything
5298 in registers in 32-bit mode. */
5299 if (stdarg_p (fntype))
5300 {
5301 cum->nregs = 0;
5302 cum->sse_nregs = 0;
5303 cum->mmx_nregs = 0;
5304 cum->warn_avx = 0;
5305 cum->warn_sse = 0;
5306 cum->warn_mmx = 0;
5307 return;
5308 }
5309
5310 /* Use ecx and edx registers if function has fastcall attribute,
5311 else look for regparm information. */
5312 if (fntype)
5313 {
5314 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
5315 {
5316 cum->nregs = 1;
5317 cum->fastcall = 1; /* Same first register as in fastcall. */
5318 }
5319 else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
5320 {
5321 cum->nregs = 2;
5322 cum->fastcall = 1;
5323 }
5324 else
5325 cum->nregs = ix86_function_regparm (fntype, fndecl);
5326 }
5327
5328 /* Set up the number of SSE registers used for passing SFmode
5329 and DFmode arguments. Warn for mismatching ABI. */
5330 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5331 }
5332 }
5333
5334 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5335 But in the case of vector types, it is some vector mode.
5336
5337 When we have only some of our vector isa extensions enabled, then there
5338 are some modes for which vector_mode_supported_p is false. For these
5339 modes, the generic vector support in gcc will choose some non-vector mode
5340 in order to implement the type. By computing the natural mode, we'll
5341 select the proper ABI location for the operand and not depend on whatever
5342 the middle-end decides to do with these vector types.
5343
5344 The midde-end can't deal with the vector types > 16 bytes. In this
5345 case, we return the original mode and warn ABI change if CUM isn't
5346 NULL. */
5347
5348 static enum machine_mode
5349 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5350 {
5351 enum machine_mode mode = TYPE_MODE (type);
5352
5353 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5354 {
5355 HOST_WIDE_INT size = int_size_in_bytes (type);
5356 if ((size == 8 || size == 16 || size == 32)
5357 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5358 && TYPE_VECTOR_SUBPARTS (type) > 1)
5359 {
5360 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5361
5362 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5363 mode = MIN_MODE_VECTOR_FLOAT;
5364 else
5365 mode = MIN_MODE_VECTOR_INT;
5366
5367 /* Get the mode which has this inner mode and number of units. */
5368 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5369 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5370 && GET_MODE_INNER (mode) == innermode)
5371 {
5372 if (size == 32 && !TARGET_AVX)
5373 {
5374 static bool warnedavx;
5375
5376 if (cum
5377 && !warnedavx
5378 && cum->warn_avx)
5379 {
5380 warnedavx = true;
5381 warning (0, "AVX vector argument without AVX "
5382 "enabled changes the ABI");
5383 }
5384 return TYPE_MODE (type);
5385 }
5386 else
5387 return mode;
5388 }
5389
5390 gcc_unreachable ();
5391 }
5392 }
5393
5394 return mode;
5395 }
5396
5397 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5398 this may not agree with the mode that the type system has chosen for the
5399 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5400 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5401
5402 static rtx
5403 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5404 unsigned int regno)
5405 {
5406 rtx tmp;
5407
5408 if (orig_mode != BLKmode)
5409 tmp = gen_rtx_REG (orig_mode, regno);
5410 else
5411 {
5412 tmp = gen_rtx_REG (mode, regno);
5413 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5414 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5415 }
5416
5417 return tmp;
5418 }
5419
5420 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5421 of this code is to classify each 8bytes of incoming argument by the register
5422 class and assign registers accordingly. */
5423
5424 /* Return the union class of CLASS1 and CLASS2.
5425 See the x86-64 PS ABI for details. */
5426
5427 static enum x86_64_reg_class
5428 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5429 {
5430 /* Rule #1: If both classes are equal, this is the resulting class. */
5431 if (class1 == class2)
5432 return class1;
5433
5434 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5435 the other class. */
5436 if (class1 == X86_64_NO_CLASS)
5437 return class2;
5438 if (class2 == X86_64_NO_CLASS)
5439 return class1;
5440
5441 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5442 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5443 return X86_64_MEMORY_CLASS;
5444
5445 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5446 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5447 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5448 return X86_64_INTEGERSI_CLASS;
5449 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5450 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5451 return X86_64_INTEGER_CLASS;
5452
5453 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5454 MEMORY is used. */
5455 if (class1 == X86_64_X87_CLASS
5456 || class1 == X86_64_X87UP_CLASS
5457 || class1 == X86_64_COMPLEX_X87_CLASS
5458 || class2 == X86_64_X87_CLASS
5459 || class2 == X86_64_X87UP_CLASS
5460 || class2 == X86_64_COMPLEX_X87_CLASS)
5461 return X86_64_MEMORY_CLASS;
5462
5463 /* Rule #6: Otherwise class SSE is used. */
5464 return X86_64_SSE_CLASS;
5465 }
5466
5467 /* Classify the argument of type TYPE and mode MODE.
5468 CLASSES will be filled by the register class used to pass each word
5469 of the operand. The number of words is returned. In case the parameter
5470 should be passed in memory, 0 is returned. As a special case for zero
5471 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5472
5473 BIT_OFFSET is used internally for handling records and specifies offset
5474 of the offset in bits modulo 256 to avoid overflow cases.
5475
5476 See the x86-64 PS ABI for details.
5477 */
5478
5479 static int
5480 classify_argument (enum machine_mode mode, const_tree type,
5481 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5482 {
5483 HOST_WIDE_INT bytes =
5484 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5485 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5486
5487 /* Variable sized entities are always passed/returned in memory. */
5488 if (bytes < 0)
5489 return 0;
5490
5491 if (mode != VOIDmode
5492 && targetm.calls.must_pass_in_stack (mode, type))
5493 return 0;
5494
5495 if (type && AGGREGATE_TYPE_P (type))
5496 {
5497 int i;
5498 tree field;
5499 enum x86_64_reg_class subclasses[MAX_CLASSES];
5500
5501 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5502 if (bytes > 32)
5503 return 0;
5504
5505 for (i = 0; i < words; i++)
5506 classes[i] = X86_64_NO_CLASS;
5507
5508 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5509 signalize memory class, so handle it as special case. */
5510 if (!words)
5511 {
5512 classes[0] = X86_64_NO_CLASS;
5513 return 1;
5514 }
5515
5516 /* Classify each field of record and merge classes. */
5517 switch (TREE_CODE (type))
5518 {
5519 case RECORD_TYPE:
5520 /* And now merge the fields of structure. */
5521 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5522 {
5523 if (TREE_CODE (field) == FIELD_DECL)
5524 {
5525 int num;
5526
5527 if (TREE_TYPE (field) == error_mark_node)
5528 continue;
5529
5530 /* Bitfields are always classified as integer. Handle them
5531 early, since later code would consider them to be
5532 misaligned integers. */
5533 if (DECL_BIT_FIELD (field))
5534 {
5535 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5536 i < ((int_bit_position (field) + (bit_offset % 64))
5537 + tree_low_cst (DECL_SIZE (field), 0)
5538 + 63) / 8 / 8; i++)
5539 classes[i] =
5540 merge_classes (X86_64_INTEGER_CLASS,
5541 classes[i]);
5542 }
5543 else
5544 {
5545 int pos;
5546
5547 type = TREE_TYPE (field);
5548
5549 /* Flexible array member is ignored. */
5550 if (TYPE_MODE (type) == BLKmode
5551 && TREE_CODE (type) == ARRAY_TYPE
5552 && TYPE_SIZE (type) == NULL_TREE
5553 && TYPE_DOMAIN (type) != NULL_TREE
5554 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5555 == NULL_TREE))
5556 {
5557 static bool warned;
5558
5559 if (!warned && warn_psabi)
5560 {
5561 warned = true;
5562 inform (input_location,
5563 "The ABI of passing struct with"
5564 " a flexible array member has"
5565 " changed in GCC 4.4");
5566 }
5567 continue;
5568 }
5569 num = classify_argument (TYPE_MODE (type), type,
5570 subclasses,
5571 (int_bit_position (field)
5572 + bit_offset) % 256);
5573 if (!num)
5574 return 0;
5575 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5576 for (i = 0; i < num && (i + pos) < words; i++)
5577 classes[i + pos] =
5578 merge_classes (subclasses[i], classes[i + pos]);
5579 }
5580 }
5581 }
5582 break;
5583
5584 case ARRAY_TYPE:
5585 /* Arrays are handled as small records. */
5586 {
5587 int num;
5588 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5589 TREE_TYPE (type), subclasses, bit_offset);
5590 if (!num)
5591 return 0;
5592
5593 /* The partial classes are now full classes. */
5594 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5595 subclasses[0] = X86_64_SSE_CLASS;
5596 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5597 && !((bit_offset % 64) == 0 && bytes == 4))
5598 subclasses[0] = X86_64_INTEGER_CLASS;
5599
5600 for (i = 0; i < words; i++)
5601 classes[i] = subclasses[i % num];
5602
5603 break;
5604 }
5605 case UNION_TYPE:
5606 case QUAL_UNION_TYPE:
5607 /* Unions are similar to RECORD_TYPE but offset is always 0.
5608 */
5609 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5610 {
5611 if (TREE_CODE (field) == FIELD_DECL)
5612 {
5613 int num;
5614
5615 if (TREE_TYPE (field) == error_mark_node)
5616 continue;
5617
5618 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5619 TREE_TYPE (field), subclasses,
5620 bit_offset);
5621 if (!num)
5622 return 0;
5623 for (i = 0; i < num; i++)
5624 classes[i] = merge_classes (subclasses[i], classes[i]);
5625 }
5626 }
5627 break;
5628
5629 default:
5630 gcc_unreachable ();
5631 }
5632
5633 if (words > 2)
5634 {
5635 /* When size > 16 bytes, if the first one isn't
5636 X86_64_SSE_CLASS or any other ones aren't
5637 X86_64_SSEUP_CLASS, everything should be passed in
5638 memory. */
5639 if (classes[0] != X86_64_SSE_CLASS)
5640 return 0;
5641
5642 for (i = 1; i < words; i++)
5643 if (classes[i] != X86_64_SSEUP_CLASS)
5644 return 0;
5645 }
5646
5647 /* Final merger cleanup. */
5648 for (i = 0; i < words; i++)
5649 {
5650 /* If one class is MEMORY, everything should be passed in
5651 memory. */
5652 if (classes[i] == X86_64_MEMORY_CLASS)
5653 return 0;
5654
5655 /* The X86_64_SSEUP_CLASS should be always preceded by
5656 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5657 if (classes[i] == X86_64_SSEUP_CLASS
5658 && classes[i - 1] != X86_64_SSE_CLASS
5659 && classes[i - 1] != X86_64_SSEUP_CLASS)
5660 {
5661 /* The first one should never be X86_64_SSEUP_CLASS. */
5662 gcc_assert (i != 0);
5663 classes[i] = X86_64_SSE_CLASS;
5664 }
5665
5666 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5667 everything should be passed in memory. */
5668 if (classes[i] == X86_64_X87UP_CLASS
5669 && (classes[i - 1] != X86_64_X87_CLASS))
5670 {
5671 static bool warned;
5672
5673 /* The first one should never be X86_64_X87UP_CLASS. */
5674 gcc_assert (i != 0);
5675 if (!warned && warn_psabi)
5676 {
5677 warned = true;
5678 inform (input_location,
5679 "The ABI of passing union with long double"
5680 " has changed in GCC 4.4");
5681 }
5682 return 0;
5683 }
5684 }
5685 return words;
5686 }
5687
5688 /* Compute alignment needed. We align all types to natural boundaries with
5689 exception of XFmode that is aligned to 64bits. */
5690 if (mode != VOIDmode && mode != BLKmode)
5691 {
5692 int mode_alignment = GET_MODE_BITSIZE (mode);
5693
5694 if (mode == XFmode)
5695 mode_alignment = 128;
5696 else if (mode == XCmode)
5697 mode_alignment = 256;
5698 if (COMPLEX_MODE_P (mode))
5699 mode_alignment /= 2;
5700 /* Misaligned fields are always returned in memory. */
5701 if (bit_offset % mode_alignment)
5702 return 0;
5703 }
5704
5705 /* for V1xx modes, just use the base mode */
5706 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
5707 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5708 mode = GET_MODE_INNER (mode);
5709
5710 /* Classification of atomic types. */
5711 switch (mode)
5712 {
5713 case SDmode:
5714 case DDmode:
5715 classes[0] = X86_64_SSE_CLASS;
5716 return 1;
5717 case TDmode:
5718 classes[0] = X86_64_SSE_CLASS;
5719 classes[1] = X86_64_SSEUP_CLASS;
5720 return 2;
5721 case DImode:
5722 case SImode:
5723 case HImode:
5724 case QImode:
5725 case CSImode:
5726 case CHImode:
5727 case CQImode:
5728 {
5729 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5730
5731 if (size <= 32)
5732 {
5733 classes[0] = X86_64_INTEGERSI_CLASS;
5734 return 1;
5735 }
5736 else if (size <= 64)
5737 {
5738 classes[0] = X86_64_INTEGER_CLASS;
5739 return 1;
5740 }
5741 else if (size <= 64+32)
5742 {
5743 classes[0] = X86_64_INTEGER_CLASS;
5744 classes[1] = X86_64_INTEGERSI_CLASS;
5745 return 2;
5746 }
5747 else if (size <= 64+64)
5748 {
5749 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5750 return 2;
5751 }
5752 else
5753 gcc_unreachable ();
5754 }
5755 case CDImode:
5756 case TImode:
5757 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5758 return 2;
5759 case COImode:
5760 case OImode:
5761 /* OImode shouldn't be used directly. */
5762 gcc_unreachable ();
5763 case CTImode:
5764 return 0;
5765 case SFmode:
5766 if (!(bit_offset % 64))
5767 classes[0] = X86_64_SSESF_CLASS;
5768 else
5769 classes[0] = X86_64_SSE_CLASS;
5770 return 1;
5771 case DFmode:
5772 classes[0] = X86_64_SSEDF_CLASS;
5773 return 1;
5774 case XFmode:
5775 classes[0] = X86_64_X87_CLASS;
5776 classes[1] = X86_64_X87UP_CLASS;
5777 return 2;
5778 case TFmode:
5779 classes[0] = X86_64_SSE_CLASS;
5780 classes[1] = X86_64_SSEUP_CLASS;
5781 return 2;
5782 case SCmode:
5783 classes[0] = X86_64_SSE_CLASS;
5784 if (!(bit_offset % 64))
5785 return 1;
5786 else
5787 {
5788 static bool warned;
5789
5790 if (!warned && warn_psabi)
5791 {
5792 warned = true;
5793 inform (input_location,
5794 "The ABI of passing structure with complex float"
5795 " member has changed in GCC 4.4");
5796 }
5797 classes[1] = X86_64_SSESF_CLASS;
5798 return 2;
5799 }
5800 case DCmode:
5801 classes[0] = X86_64_SSEDF_CLASS;
5802 classes[1] = X86_64_SSEDF_CLASS;
5803 return 2;
5804 case XCmode:
5805 classes[0] = X86_64_COMPLEX_X87_CLASS;
5806 return 1;
5807 case TCmode:
5808 /* This modes is larger than 16 bytes. */
5809 return 0;
5810 case V8SFmode:
5811 case V8SImode:
5812 case V32QImode:
5813 case V16HImode:
5814 case V4DFmode:
5815 case V4DImode:
5816 classes[0] = X86_64_SSE_CLASS;
5817 classes[1] = X86_64_SSEUP_CLASS;
5818 classes[2] = X86_64_SSEUP_CLASS;
5819 classes[3] = X86_64_SSEUP_CLASS;
5820 return 4;
5821 case V4SFmode:
5822 case V4SImode:
5823 case V16QImode:
5824 case V8HImode:
5825 case V2DFmode:
5826 case V2DImode:
5827 classes[0] = X86_64_SSE_CLASS;
5828 classes[1] = X86_64_SSEUP_CLASS;
5829 return 2;
5830 case V1TImode:
5831 case V1DImode:
5832 case V2SFmode:
5833 case V2SImode:
5834 case V4HImode:
5835 case V8QImode:
5836 classes[0] = X86_64_SSE_CLASS;
5837 return 1;
5838 case BLKmode:
5839 case VOIDmode:
5840 return 0;
5841 default:
5842 gcc_assert (VECTOR_MODE_P (mode));
5843
5844 if (bytes > 16)
5845 return 0;
5846
5847 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5848
5849 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5850 classes[0] = X86_64_INTEGERSI_CLASS;
5851 else
5852 classes[0] = X86_64_INTEGER_CLASS;
5853 classes[1] = X86_64_INTEGER_CLASS;
5854 return 1 + (bytes > 8);
5855 }
5856 }
5857
5858 /* Examine the argument and return set number of register required in each
5859 class. Return 0 iff parameter should be passed in memory. */
5860 static int
5861 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5862 int *int_nregs, int *sse_nregs)
5863 {
5864 enum x86_64_reg_class regclass[MAX_CLASSES];
5865 int n = classify_argument (mode, type, regclass, 0);
5866
5867 *int_nregs = 0;
5868 *sse_nregs = 0;
5869 if (!n)
5870 return 0;
5871 for (n--; n >= 0; n--)
5872 switch (regclass[n])
5873 {
5874 case X86_64_INTEGER_CLASS:
5875 case X86_64_INTEGERSI_CLASS:
5876 (*int_nregs)++;
5877 break;
5878 case X86_64_SSE_CLASS:
5879 case X86_64_SSESF_CLASS:
5880 case X86_64_SSEDF_CLASS:
5881 (*sse_nregs)++;
5882 break;
5883 case X86_64_NO_CLASS:
5884 case X86_64_SSEUP_CLASS:
5885 break;
5886 case X86_64_X87_CLASS:
5887 case X86_64_X87UP_CLASS:
5888 if (!in_return)
5889 return 0;
5890 break;
5891 case X86_64_COMPLEX_X87_CLASS:
5892 return in_return ? 2 : 0;
5893 case X86_64_MEMORY_CLASS:
5894 gcc_unreachable ();
5895 }
5896 return 1;
5897 }
5898
5899 /* Construct container for the argument used by GCC interface. See
5900 FUNCTION_ARG for the detailed description. */
5901
5902 static rtx
5903 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5904 const_tree type, int in_return, int nintregs, int nsseregs,
5905 const int *intreg, int sse_regno)
5906 {
5907 /* The following variables hold the static issued_error state. */
5908 static bool issued_sse_arg_error;
5909 static bool issued_sse_ret_error;
5910 static bool issued_x87_ret_error;
5911
5912 enum machine_mode tmpmode;
5913 int bytes =
5914 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5915 enum x86_64_reg_class regclass[MAX_CLASSES];
5916 int n;
5917 int i;
5918 int nexps = 0;
5919 int needed_sseregs, needed_intregs;
5920 rtx exp[MAX_CLASSES];
5921 rtx ret;
5922
5923 n = classify_argument (mode, type, regclass, 0);
5924 if (!n)
5925 return NULL;
5926 if (!examine_argument (mode, type, in_return, &needed_intregs,
5927 &needed_sseregs))
5928 return NULL;
5929 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5930 return NULL;
5931
5932 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5933 some less clueful developer tries to use floating-point anyway. */
5934 if (needed_sseregs && !TARGET_SSE)
5935 {
5936 if (in_return)
5937 {
5938 if (!issued_sse_ret_error)
5939 {
5940 error ("SSE register return with SSE disabled");
5941 issued_sse_ret_error = true;
5942 }
5943 }
5944 else if (!issued_sse_arg_error)
5945 {
5946 error ("SSE register argument with SSE disabled");
5947 issued_sse_arg_error = true;
5948 }
5949 return NULL;
5950 }
5951
5952 /* Likewise, error if the ABI requires us to return values in the
5953 x87 registers and the user specified -mno-80387. */
5954 if (!TARGET_80387 && in_return)
5955 for (i = 0; i < n; i++)
5956 if (regclass[i] == X86_64_X87_CLASS
5957 || regclass[i] == X86_64_X87UP_CLASS
5958 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5959 {
5960 if (!issued_x87_ret_error)
5961 {
5962 error ("x87 register return with x87 disabled");
5963 issued_x87_ret_error = true;
5964 }
5965 return NULL;
5966 }
5967
5968 /* First construct simple cases. Avoid SCmode, since we want to use
5969 single register to pass this type. */
5970 if (n == 1 && mode != SCmode)
5971 switch (regclass[0])
5972 {
5973 case X86_64_INTEGER_CLASS:
5974 case X86_64_INTEGERSI_CLASS:
5975 return gen_rtx_REG (mode, intreg[0]);
5976 case X86_64_SSE_CLASS:
5977 case X86_64_SSESF_CLASS:
5978 case X86_64_SSEDF_CLASS:
5979 if (mode != BLKmode)
5980 return gen_reg_or_parallel (mode, orig_mode,
5981 SSE_REGNO (sse_regno));
5982 break;
5983 case X86_64_X87_CLASS:
5984 case X86_64_COMPLEX_X87_CLASS:
5985 return gen_rtx_REG (mode, FIRST_STACK_REG);
5986 case X86_64_NO_CLASS:
5987 /* Zero sized array, struct or class. */
5988 return NULL;
5989 default:
5990 gcc_unreachable ();
5991 }
5992 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5993 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5994 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5995 if (n == 4
5996 && regclass[0] == X86_64_SSE_CLASS
5997 && regclass[1] == X86_64_SSEUP_CLASS
5998 && regclass[2] == X86_64_SSEUP_CLASS
5999 && regclass[3] == X86_64_SSEUP_CLASS
6000 && mode != BLKmode)
6001 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6002
6003 if (n == 2
6004 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6005 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6006 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6007 && regclass[1] == X86_64_INTEGER_CLASS
6008 && (mode == CDImode || mode == TImode || mode == TFmode)
6009 && intreg[0] + 1 == intreg[1])
6010 return gen_rtx_REG (mode, intreg[0]);
6011
6012 /* Otherwise figure out the entries of the PARALLEL. */
6013 for (i = 0; i < n; i++)
6014 {
6015 int pos;
6016
6017 switch (regclass[i])
6018 {
6019 case X86_64_NO_CLASS:
6020 break;
6021 case X86_64_INTEGER_CLASS:
6022 case X86_64_INTEGERSI_CLASS:
6023 /* Merge TImodes on aligned occasions here too. */
6024 if (i * 8 + 8 > bytes)
6025 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6026 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6027 tmpmode = SImode;
6028 else
6029 tmpmode = DImode;
6030 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6031 if (tmpmode == BLKmode)
6032 tmpmode = DImode;
6033 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6034 gen_rtx_REG (tmpmode, *intreg),
6035 GEN_INT (i*8));
6036 intreg++;
6037 break;
6038 case X86_64_SSESF_CLASS:
6039 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6040 gen_rtx_REG (SFmode,
6041 SSE_REGNO (sse_regno)),
6042 GEN_INT (i*8));
6043 sse_regno++;
6044 break;
6045 case X86_64_SSEDF_CLASS:
6046 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6047 gen_rtx_REG (DFmode,
6048 SSE_REGNO (sse_regno)),
6049 GEN_INT (i*8));
6050 sse_regno++;
6051 break;
6052 case X86_64_SSE_CLASS:
6053 pos = i;
6054 switch (n)
6055 {
6056 case 1:
6057 tmpmode = DImode;
6058 break;
6059 case 2:
6060 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6061 {
6062 tmpmode = TImode;
6063 i++;
6064 }
6065 else
6066 tmpmode = DImode;
6067 break;
6068 case 4:
6069 gcc_assert (i == 0
6070 && regclass[1] == X86_64_SSEUP_CLASS
6071 && regclass[2] == X86_64_SSEUP_CLASS
6072 && regclass[3] == X86_64_SSEUP_CLASS);
6073 tmpmode = OImode;
6074 i += 3;
6075 break;
6076 default:
6077 gcc_unreachable ();
6078 }
6079 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6080 gen_rtx_REG (tmpmode,
6081 SSE_REGNO (sse_regno)),
6082 GEN_INT (pos*8));
6083 sse_regno++;
6084 break;
6085 default:
6086 gcc_unreachable ();
6087 }
6088 }
6089
6090 /* Empty aligned struct, union or class. */
6091 if (nexps == 0)
6092 return NULL;
6093
6094 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6095 for (i = 0; i < nexps; i++)
6096 XVECEXP (ret, 0, i) = exp [i];
6097 return ret;
6098 }
6099
6100 /* Update the data in CUM to advance over an argument of mode MODE
6101 and data type TYPE. (TYPE is null for libcalls where that information
6102 may not be available.) */
6103
6104 static void
6105 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6106 const_tree type, HOST_WIDE_INT bytes,
6107 HOST_WIDE_INT words)
6108 {
6109 switch (mode)
6110 {
6111 default:
6112 break;
6113
6114 case BLKmode:
6115 if (bytes < 0)
6116 break;
6117 /* FALLTHRU */
6118
6119 case DImode:
6120 case SImode:
6121 case HImode:
6122 case QImode:
6123 cum->words += words;
6124 cum->nregs -= words;
6125 cum->regno += words;
6126
6127 if (cum->nregs <= 0)
6128 {
6129 cum->nregs = 0;
6130 cum->regno = 0;
6131 }
6132 break;
6133
6134 case OImode:
6135 /* OImode shouldn't be used directly. */
6136 gcc_unreachable ();
6137
6138 case DFmode:
6139 if (cum->float_in_sse < 2)
6140 break;
6141 case SFmode:
6142 if (cum->float_in_sse < 1)
6143 break;
6144 /* FALLTHRU */
6145
6146 case V8SFmode:
6147 case V8SImode:
6148 case V32QImode:
6149 case V16HImode:
6150 case V4DFmode:
6151 case V4DImode:
6152 case TImode:
6153 case V16QImode:
6154 case V8HImode:
6155 case V4SImode:
6156 case V2DImode:
6157 case V4SFmode:
6158 case V2DFmode:
6159 if (!type || !AGGREGATE_TYPE_P (type))
6160 {
6161 cum->sse_words += words;
6162 cum->sse_nregs -= 1;
6163 cum->sse_regno += 1;
6164 if (cum->sse_nregs <= 0)
6165 {
6166 cum->sse_nregs = 0;
6167 cum->sse_regno = 0;
6168 }
6169 }
6170 break;
6171
6172 case V8QImode:
6173 case V4HImode:
6174 case V2SImode:
6175 case V2SFmode:
6176 case V1TImode:
6177 case V1DImode:
6178 if (!type || !AGGREGATE_TYPE_P (type))
6179 {
6180 cum->mmx_words += words;
6181 cum->mmx_nregs -= 1;
6182 cum->mmx_regno += 1;
6183 if (cum->mmx_nregs <= 0)
6184 {
6185 cum->mmx_nregs = 0;
6186 cum->mmx_regno = 0;
6187 }
6188 }
6189 break;
6190 }
6191 }
6192
6193 static void
6194 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6195 const_tree type, HOST_WIDE_INT words, bool named)
6196 {
6197 int int_nregs, sse_nregs;
6198
6199 /* Unnamed 256bit vector mode parameters are passed on stack. */
6200 if (!named && VALID_AVX256_REG_MODE (mode))
6201 return;
6202
6203 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6204 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6205 {
6206 cum->nregs -= int_nregs;
6207 cum->sse_nregs -= sse_nregs;
6208 cum->regno += int_nregs;
6209 cum->sse_regno += sse_nregs;
6210 }
6211 else
6212 {
6213 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6214 cum->words = (cum->words + align - 1) & ~(align - 1);
6215 cum->words += words;
6216 }
6217 }
6218
6219 static void
6220 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6221 HOST_WIDE_INT words)
6222 {
6223 /* Otherwise, this should be passed indirect. */
6224 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6225
6226 cum->words += words;
6227 if (cum->nregs > 0)
6228 {
6229 cum->nregs -= 1;
6230 cum->regno += 1;
6231 }
6232 }
6233
6234 /* Update the data in CUM to advance over an argument of mode MODE and
6235 data type TYPE. (TYPE is null for libcalls where that information
6236 may not be available.) */
6237
6238 static void
6239 ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6240 const_tree type, bool named)
6241 {
6242 HOST_WIDE_INT bytes, words;
6243
6244 if (mode == BLKmode)
6245 bytes = int_size_in_bytes (type);
6246 else
6247 bytes = GET_MODE_SIZE (mode);
6248 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6249
6250 if (type)
6251 mode = type_natural_mode (type, NULL);
6252
6253 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6254 function_arg_advance_ms_64 (cum, bytes, words);
6255 else if (TARGET_64BIT)
6256 function_arg_advance_64 (cum, mode, type, words, named);
6257 else
6258 function_arg_advance_32 (cum, mode, type, bytes, words);
6259 }
6260
6261 /* Define where to put the arguments to a function.
6262 Value is zero to push the argument on the stack,
6263 or a hard register in which to store the argument.
6264
6265 MODE is the argument's machine mode.
6266 TYPE is the data type of the argument (as a tree).
6267 This is null for libcalls where that information may
6268 not be available.
6269 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6270 the preceding args and about the function being called.
6271 NAMED is nonzero if this argument is a named parameter
6272 (otherwise it is an extra parameter matching an ellipsis). */
6273
6274 static rtx
6275 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6276 enum machine_mode orig_mode, const_tree type,
6277 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6278 {
6279 static bool warnedsse, warnedmmx;
6280
6281 /* Avoid the AL settings for the Unix64 ABI. */
6282 if (mode == VOIDmode)
6283 return constm1_rtx;
6284
6285 switch (mode)
6286 {
6287 default:
6288 break;
6289
6290 case BLKmode:
6291 if (bytes < 0)
6292 break;
6293 /* FALLTHRU */
6294 case DImode:
6295 case SImode:
6296 case HImode:
6297 case QImode:
6298 if (words <= cum->nregs)
6299 {
6300 int regno = cum->regno;
6301
6302 /* Fastcall allocates the first two DWORD (SImode) or
6303 smaller arguments to ECX and EDX if it isn't an
6304 aggregate type . */
6305 if (cum->fastcall)
6306 {
6307 if (mode == BLKmode
6308 || mode == DImode
6309 || (type && AGGREGATE_TYPE_P (type)))
6310 break;
6311
6312 /* ECX not EAX is the first allocated register. */
6313 if (regno == AX_REG)
6314 regno = CX_REG;
6315 }
6316 return gen_rtx_REG (mode, regno);
6317 }
6318 break;
6319
6320 case DFmode:
6321 if (cum->float_in_sse < 2)
6322 break;
6323 case SFmode:
6324 if (cum->float_in_sse < 1)
6325 break;
6326 /* FALLTHRU */
6327 case TImode:
6328 /* In 32bit, we pass TImode in xmm registers. */
6329 case V16QImode:
6330 case V8HImode:
6331 case V4SImode:
6332 case V2DImode:
6333 case V4SFmode:
6334 case V2DFmode:
6335 if (!type || !AGGREGATE_TYPE_P (type))
6336 {
6337 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6338 {
6339 warnedsse = true;
6340 warning (0, "SSE vector argument without SSE enabled "
6341 "changes the ABI");
6342 }
6343 if (cum->sse_nregs)
6344 return gen_reg_or_parallel (mode, orig_mode,
6345 cum->sse_regno + FIRST_SSE_REG);
6346 }
6347 break;
6348
6349 case OImode:
6350 /* OImode shouldn't be used directly. */
6351 gcc_unreachable ();
6352
6353 case V8SFmode:
6354 case V8SImode:
6355 case V32QImode:
6356 case V16HImode:
6357 case V4DFmode:
6358 case V4DImode:
6359 if (!type || !AGGREGATE_TYPE_P (type))
6360 {
6361 if (cum->sse_nregs)
6362 return gen_reg_or_parallel (mode, orig_mode,
6363 cum->sse_regno + FIRST_SSE_REG);
6364 }
6365 break;
6366
6367 case V8QImode:
6368 case V4HImode:
6369 case V2SImode:
6370 case V2SFmode:
6371 case V1TImode:
6372 case V1DImode:
6373 if (!type || !AGGREGATE_TYPE_P (type))
6374 {
6375 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6376 {
6377 warnedmmx = true;
6378 warning (0, "MMX vector argument without MMX enabled "
6379 "changes the ABI");
6380 }
6381 if (cum->mmx_nregs)
6382 return gen_reg_or_parallel (mode, orig_mode,
6383 cum->mmx_regno + FIRST_MMX_REG);
6384 }
6385 break;
6386 }
6387
6388 return NULL_RTX;
6389 }
6390
6391 static rtx
6392 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6393 enum machine_mode orig_mode, const_tree type, bool named)
6394 {
6395 /* Handle a hidden AL argument containing number of registers
6396 for varargs x86-64 functions. */
6397 if (mode == VOIDmode)
6398 return GEN_INT (cum->maybe_vaarg
6399 ? (cum->sse_nregs < 0
6400 ? X86_64_SSE_REGPARM_MAX
6401 : cum->sse_regno)
6402 : -1);
6403
6404 switch (mode)
6405 {
6406 default:
6407 break;
6408
6409 case V8SFmode:
6410 case V8SImode:
6411 case V32QImode:
6412 case V16HImode:
6413 case V4DFmode:
6414 case V4DImode:
6415 /* Unnamed 256bit vector mode parameters are passed on stack. */
6416 if (!named)
6417 return NULL;
6418 break;
6419 }
6420
6421 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6422 cum->sse_nregs,
6423 &x86_64_int_parameter_registers [cum->regno],
6424 cum->sse_regno);
6425 }
6426
6427 static rtx
6428 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6429 enum machine_mode orig_mode, bool named,
6430 HOST_WIDE_INT bytes)
6431 {
6432 unsigned int regno;
6433
6434 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6435 We use value of -2 to specify that current function call is MSABI. */
6436 if (mode == VOIDmode)
6437 return GEN_INT (-2);
6438
6439 /* If we've run out of registers, it goes on the stack. */
6440 if (cum->nregs == 0)
6441 return NULL_RTX;
6442
6443 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6444
6445 /* Only floating point modes are passed in anything but integer regs. */
6446 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6447 {
6448 if (named)
6449 regno = cum->regno + FIRST_SSE_REG;
6450 else
6451 {
6452 rtx t1, t2;
6453
6454 /* Unnamed floating parameters are passed in both the
6455 SSE and integer registers. */
6456 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6457 t2 = gen_rtx_REG (mode, regno);
6458 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6459 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6460 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6461 }
6462 }
6463 /* Handle aggregated types passed in register. */
6464 if (orig_mode == BLKmode)
6465 {
6466 if (bytes > 0 && bytes <= 8)
6467 mode = (bytes > 4 ? DImode : SImode);
6468 if (mode == BLKmode)
6469 mode = DImode;
6470 }
6471
6472 return gen_reg_or_parallel (mode, orig_mode, regno);
6473 }
6474
6475 /* Return where to put the arguments to a function.
6476 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6477
6478 MODE is the argument's machine mode. TYPE is the data type of the
6479 argument. It is null for libcalls where that information may not be
6480 available. CUM gives information about the preceding args and about
6481 the function being called. NAMED is nonzero if this argument is a
6482 named parameter (otherwise it is an extra parameter matching an
6483 ellipsis). */
6484
6485 static rtx
6486 ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
6487 const_tree type, bool named)
6488 {
6489 enum machine_mode mode = omode;
6490 HOST_WIDE_INT bytes, words;
6491
6492 if (mode == BLKmode)
6493 bytes = int_size_in_bytes (type);
6494 else
6495 bytes = GET_MODE_SIZE (mode);
6496 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6497
6498 /* To simplify the code below, represent vector types with a vector mode
6499 even if MMX/SSE are not active. */
6500 if (type && TREE_CODE (type) == VECTOR_TYPE)
6501 mode = type_natural_mode (type, cum);
6502
6503 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6504 return function_arg_ms_64 (cum, mode, omode, named, bytes);
6505 else if (TARGET_64BIT)
6506 return function_arg_64 (cum, mode, omode, type, named);
6507 else
6508 return function_arg_32 (cum, mode, omode, type, bytes, words);
6509 }
6510
6511 /* A C expression that indicates when an argument must be passed by
6512 reference. If nonzero for an argument, a copy of that argument is
6513 made in memory and a pointer to the argument is passed instead of
6514 the argument itself. The pointer is passed in whatever way is
6515 appropriate for passing a pointer to that type. */
6516
6517 static bool
6518 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6519 enum machine_mode mode ATTRIBUTE_UNUSED,
6520 const_tree type, bool named ATTRIBUTE_UNUSED)
6521 {
6522 /* See Windows x64 Software Convention. */
6523 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6524 {
6525 int msize = (int) GET_MODE_SIZE (mode);
6526 if (type)
6527 {
6528 /* Arrays are passed by reference. */
6529 if (TREE_CODE (type) == ARRAY_TYPE)
6530 return true;
6531
6532 if (AGGREGATE_TYPE_P (type))
6533 {
6534 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6535 are passed by reference. */
6536 msize = int_size_in_bytes (type);
6537 }
6538 }
6539
6540 /* __m128 is passed by reference. */
6541 switch (msize) {
6542 case 1: case 2: case 4: case 8:
6543 break;
6544 default:
6545 return true;
6546 }
6547 }
6548 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6549 return 1;
6550
6551 return 0;
6552 }
6553
6554 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6555 ABI. */
6556 static bool
6557 contains_aligned_value_p (const_tree type)
6558 {
6559 enum machine_mode mode = TYPE_MODE (type);
6560 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6561 || mode == TDmode
6562 || mode == TFmode
6563 || mode == TCmode)
6564 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6565 return true;
6566 if (TYPE_ALIGN (type) < 128)
6567 return false;
6568
6569 if (AGGREGATE_TYPE_P (type))
6570 {
6571 /* Walk the aggregates recursively. */
6572 switch (TREE_CODE (type))
6573 {
6574 case RECORD_TYPE:
6575 case UNION_TYPE:
6576 case QUAL_UNION_TYPE:
6577 {
6578 tree field;
6579
6580 /* Walk all the structure fields. */
6581 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6582 {
6583 if (TREE_CODE (field) == FIELD_DECL
6584 && contains_aligned_value_p (TREE_TYPE (field)))
6585 return true;
6586 }
6587 break;
6588 }
6589
6590 case ARRAY_TYPE:
6591 /* Just for use if some languages passes arrays by value. */
6592 if (contains_aligned_value_p (TREE_TYPE (type)))
6593 return true;
6594 break;
6595
6596 default:
6597 gcc_unreachable ();
6598 }
6599 }
6600 return false;
6601 }
6602
6603 /* Gives the alignment boundary, in bits, of an argument with the
6604 specified mode and type. */
6605
6606 int
6607 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
6608 {
6609 int align;
6610 if (type)
6611 {
6612 /* Since the main variant type is used for call, we convert it to
6613 the main variant type. */
6614 type = TYPE_MAIN_VARIANT (type);
6615 align = TYPE_ALIGN (type);
6616 }
6617 else
6618 align = GET_MODE_ALIGNMENT (mode);
6619 if (align < PARM_BOUNDARY)
6620 align = PARM_BOUNDARY;
6621 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6622 natural boundaries. */
6623 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6624 {
6625 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6626 make an exception for SSE modes since these require 128bit
6627 alignment.
6628
6629 The handling here differs from field_alignment. ICC aligns MMX
6630 arguments to 4 byte boundaries, while structure fields are aligned
6631 to 8 byte boundaries. */
6632 if (!type)
6633 {
6634 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6635 align = PARM_BOUNDARY;
6636 }
6637 else
6638 {
6639 if (!contains_aligned_value_p (type))
6640 align = PARM_BOUNDARY;
6641 }
6642 }
6643 if (align > BIGGEST_ALIGNMENT)
6644 align = BIGGEST_ALIGNMENT;
6645 return align;
6646 }
6647
6648 /* Return true if N is a possible register number of function value. */
6649
6650 static bool
6651 ix86_function_value_regno_p (const unsigned int regno)
6652 {
6653 switch (regno)
6654 {
6655 case 0:
6656 return true;
6657
6658 case FIRST_FLOAT_REG:
6659 /* TODO: The function should depend on current function ABI but
6660 builtins.c would need updating then. Therefore we use the
6661 default ABI. */
6662 if (TARGET_64BIT && ix86_abi == MS_ABI)
6663 return false;
6664 return TARGET_FLOAT_RETURNS_IN_80387;
6665
6666 case FIRST_SSE_REG:
6667 return TARGET_SSE;
6668
6669 case FIRST_MMX_REG:
6670 if (TARGET_MACHO || TARGET_64BIT)
6671 return false;
6672 return TARGET_MMX;
6673 }
6674
6675 return false;
6676 }
6677
6678 /* Define how to find the value returned by a function.
6679 VALTYPE is the data type of the value (as a tree).
6680 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6681 otherwise, FUNC is 0. */
6682
6683 static rtx
6684 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6685 const_tree fntype, const_tree fn)
6686 {
6687 unsigned int regno;
6688
6689 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6690 we normally prevent this case when mmx is not available. However
6691 some ABIs may require the result to be returned like DImode. */
6692 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6693 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6694
6695 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6696 we prevent this case when sse is not available. However some ABIs
6697 may require the result to be returned like integer TImode. */
6698 else if (mode == TImode
6699 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6700 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6701
6702 /* 32-byte vector modes in %ymm0. */
6703 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6704 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6705
6706 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6707 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6708 regno = FIRST_FLOAT_REG;
6709 else
6710 /* Most things go in %eax. */
6711 regno = AX_REG;
6712
6713 /* Override FP return register with %xmm0 for local functions when
6714 SSE math is enabled or for functions with sseregparm attribute. */
6715 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6716 {
6717 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6718 if ((sse_level >= 1 && mode == SFmode)
6719 || (sse_level == 2 && mode == DFmode))
6720 regno = FIRST_SSE_REG;
6721 }
6722
6723 /* OImode shouldn't be used directly. */
6724 gcc_assert (mode != OImode);
6725
6726 return gen_rtx_REG (orig_mode, regno);
6727 }
6728
6729 static rtx
6730 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6731 const_tree valtype)
6732 {
6733 rtx ret;
6734
6735 /* Handle libcalls, which don't provide a type node. */
6736 if (valtype == NULL)
6737 {
6738 switch (mode)
6739 {
6740 case SFmode:
6741 case SCmode:
6742 case DFmode:
6743 case DCmode:
6744 case TFmode:
6745 case SDmode:
6746 case DDmode:
6747 case TDmode:
6748 return gen_rtx_REG (mode, FIRST_SSE_REG);
6749 case XFmode:
6750 case XCmode:
6751 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6752 case TCmode:
6753 return NULL;
6754 default:
6755 return gen_rtx_REG (mode, AX_REG);
6756 }
6757 }
6758
6759 ret = construct_container (mode, orig_mode, valtype, 1,
6760 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6761 x86_64_int_return_registers, 0);
6762
6763 /* For zero sized structures, construct_container returns NULL, but we
6764 need to keep rest of compiler happy by returning meaningful value. */
6765 if (!ret)
6766 ret = gen_rtx_REG (orig_mode, AX_REG);
6767
6768 return ret;
6769 }
6770
6771 static rtx
6772 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6773 {
6774 unsigned int regno = AX_REG;
6775
6776 if (TARGET_SSE)
6777 {
6778 switch (GET_MODE_SIZE (mode))
6779 {
6780 case 16:
6781 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6782 && !COMPLEX_MODE_P (mode))
6783 regno = FIRST_SSE_REG;
6784 break;
6785 case 8:
6786 case 4:
6787 if (mode == SFmode || mode == DFmode)
6788 regno = FIRST_SSE_REG;
6789 break;
6790 default:
6791 break;
6792 }
6793 }
6794 return gen_rtx_REG (orig_mode, regno);
6795 }
6796
6797 static rtx
6798 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6799 enum machine_mode orig_mode, enum machine_mode mode)
6800 {
6801 const_tree fn, fntype;
6802
6803 fn = NULL_TREE;
6804 if (fntype_or_decl && DECL_P (fntype_or_decl))
6805 fn = fntype_or_decl;
6806 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6807
6808 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6809 return function_value_ms_64 (orig_mode, mode);
6810 else if (TARGET_64BIT)
6811 return function_value_64 (orig_mode, mode, valtype);
6812 else
6813 return function_value_32 (orig_mode, mode, fntype, fn);
6814 }
6815
6816 static rtx
6817 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6818 bool outgoing ATTRIBUTE_UNUSED)
6819 {
6820 enum machine_mode mode, orig_mode;
6821
6822 orig_mode = TYPE_MODE (valtype);
6823 mode = type_natural_mode (valtype, NULL);
6824 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6825 }
6826
6827 rtx
6828 ix86_libcall_value (enum machine_mode mode)
6829 {
6830 return ix86_function_value_1 (NULL, NULL, mode, mode);
6831 }
6832
6833 /* Return true iff type is returned in memory. */
6834
6835 static bool ATTRIBUTE_UNUSED
6836 return_in_memory_32 (const_tree type, enum machine_mode mode)
6837 {
6838 HOST_WIDE_INT size;
6839
6840 if (mode == BLKmode)
6841 return true;
6842
6843 size = int_size_in_bytes (type);
6844
6845 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6846 return false;
6847
6848 if (VECTOR_MODE_P (mode) || mode == TImode)
6849 {
6850 /* User-created vectors small enough to fit in EAX. */
6851 if (size < 8)
6852 return false;
6853
6854 /* MMX/3dNow values are returned in MM0,
6855 except when it doesn't exits or the ABI prescribes otherwise. */
6856 if (size == 8)
6857 return !TARGET_MMX || TARGET_VECT8_RETURNS;
6858
6859 /* SSE values are returned in XMM0, except when it doesn't exist. */
6860 if (size == 16)
6861 return !TARGET_SSE;
6862
6863 /* AVX values are returned in YMM0, except when it doesn't exist. */
6864 if (size == 32)
6865 return !TARGET_AVX;
6866 }
6867
6868 if (mode == XFmode)
6869 return false;
6870
6871 if (size > 12)
6872 return true;
6873
6874 /* OImode shouldn't be used directly. */
6875 gcc_assert (mode != OImode);
6876
6877 return false;
6878 }
6879
6880 static bool ATTRIBUTE_UNUSED
6881 return_in_memory_64 (const_tree type, enum machine_mode mode)
6882 {
6883 int needed_intregs, needed_sseregs;
6884 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6885 }
6886
6887 static bool ATTRIBUTE_UNUSED
6888 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6889 {
6890 HOST_WIDE_INT size = int_size_in_bytes (type);
6891
6892 /* __m128 is returned in xmm0. */
6893 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6894 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6895 return false;
6896
6897 /* Otherwise, the size must be exactly in [1248]. */
6898 return size != 1 && size != 2 && size != 4 && size != 8;
6899 }
6900
6901 static bool
6902 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6903 {
6904 #ifdef SUBTARGET_RETURN_IN_MEMORY
6905 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6906 #else
6907 const enum machine_mode mode = type_natural_mode (type, NULL);
6908
6909 if (TARGET_64BIT)
6910 {
6911 if (ix86_function_type_abi (fntype) == MS_ABI)
6912 return return_in_memory_ms_64 (type, mode);
6913 else
6914 return return_in_memory_64 (type, mode);
6915 }
6916 else
6917 return return_in_memory_32 (type, mode);
6918 #endif
6919 }
6920
6921 /* When returning SSE vector types, we have a choice of either
6922 (1) being abi incompatible with a -march switch, or
6923 (2) generating an error.
6924 Given no good solution, I think the safest thing is one warning.
6925 The user won't be able to use -Werror, but....
6926
6927 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6928 called in response to actually generating a caller or callee that
6929 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6930 via aggregate_value_p for general type probing from tree-ssa. */
6931
6932 static rtx
6933 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6934 {
6935 static bool warnedsse, warnedmmx;
6936
6937 if (!TARGET_64BIT && type)
6938 {
6939 /* Look at the return type of the function, not the function type. */
6940 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6941
6942 if (!TARGET_SSE && !warnedsse)
6943 {
6944 if (mode == TImode
6945 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6946 {
6947 warnedsse = true;
6948 warning (0, "SSE vector return without SSE enabled "
6949 "changes the ABI");
6950 }
6951 }
6952
6953 if (!TARGET_MMX && !warnedmmx)
6954 {
6955 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6956 {
6957 warnedmmx = true;
6958 warning (0, "MMX vector return without MMX enabled "
6959 "changes the ABI");
6960 }
6961 }
6962 }
6963
6964 return NULL;
6965 }
6966
6967 \f
6968 /* Create the va_list data type. */
6969
6970 /* Returns the calling convention specific va_list date type.
6971 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6972
6973 static tree
6974 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6975 {
6976 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6977
6978 /* For i386 we use plain pointer to argument area. */
6979 if (!TARGET_64BIT || abi == MS_ABI)
6980 return build_pointer_type (char_type_node);
6981
6982 record = lang_hooks.types.make_type (RECORD_TYPE);
6983 type_decl = build_decl (BUILTINS_LOCATION,
6984 TYPE_DECL, get_identifier ("__va_list_tag"), record);
6985
6986 f_gpr = build_decl (BUILTINS_LOCATION,
6987 FIELD_DECL, get_identifier ("gp_offset"),
6988 unsigned_type_node);
6989 f_fpr = build_decl (BUILTINS_LOCATION,
6990 FIELD_DECL, get_identifier ("fp_offset"),
6991 unsigned_type_node);
6992 f_ovf = build_decl (BUILTINS_LOCATION,
6993 FIELD_DECL, get_identifier ("overflow_arg_area"),
6994 ptr_type_node);
6995 f_sav = build_decl (BUILTINS_LOCATION,
6996 FIELD_DECL, get_identifier ("reg_save_area"),
6997 ptr_type_node);
6998
6999 va_list_gpr_counter_field = f_gpr;
7000 va_list_fpr_counter_field = f_fpr;
7001
7002 DECL_FIELD_CONTEXT (f_gpr) = record;
7003 DECL_FIELD_CONTEXT (f_fpr) = record;
7004 DECL_FIELD_CONTEXT (f_ovf) = record;
7005 DECL_FIELD_CONTEXT (f_sav) = record;
7006
7007 TREE_CHAIN (record) = type_decl;
7008 TYPE_NAME (record) = type_decl;
7009 TYPE_FIELDS (record) = f_gpr;
7010 DECL_CHAIN (f_gpr) = f_fpr;
7011 DECL_CHAIN (f_fpr) = f_ovf;
7012 DECL_CHAIN (f_ovf) = f_sav;
7013
7014 layout_type (record);
7015
7016 /* The correct type is an array type of one element. */
7017 return build_array_type (record, build_index_type (size_zero_node));
7018 }
7019
7020 /* Setup the builtin va_list data type and for 64-bit the additional
7021 calling convention specific va_list data types. */
7022
7023 static tree
7024 ix86_build_builtin_va_list (void)
7025 {
7026 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7027
7028 /* Initialize abi specific va_list builtin types. */
7029 if (TARGET_64BIT)
7030 {
7031 tree t;
7032 if (ix86_abi == MS_ABI)
7033 {
7034 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7035 if (TREE_CODE (t) != RECORD_TYPE)
7036 t = build_variant_type_copy (t);
7037 sysv_va_list_type_node = t;
7038 }
7039 else
7040 {
7041 t = ret;
7042 if (TREE_CODE (t) != RECORD_TYPE)
7043 t = build_variant_type_copy (t);
7044 sysv_va_list_type_node = t;
7045 }
7046 if (ix86_abi != MS_ABI)
7047 {
7048 t = ix86_build_builtin_va_list_abi (MS_ABI);
7049 if (TREE_CODE (t) != RECORD_TYPE)
7050 t = build_variant_type_copy (t);
7051 ms_va_list_type_node = t;
7052 }
7053 else
7054 {
7055 t = ret;
7056 if (TREE_CODE (t) != RECORD_TYPE)
7057 t = build_variant_type_copy (t);
7058 ms_va_list_type_node = t;
7059 }
7060 }
7061
7062 return ret;
7063 }
7064
7065 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7066
7067 static void
7068 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7069 {
7070 rtx save_area, mem;
7071 alias_set_type set;
7072 int i, max;
7073
7074 /* GPR size of varargs save area. */
7075 if (cfun->va_list_gpr_size)
7076 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7077 else
7078 ix86_varargs_gpr_size = 0;
7079
7080 /* FPR size of varargs save area. We don't need it if we don't pass
7081 anything in SSE registers. */
7082 if (TARGET_SSE && cfun->va_list_fpr_size)
7083 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7084 else
7085 ix86_varargs_fpr_size = 0;
7086
7087 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7088 return;
7089
7090 save_area = frame_pointer_rtx;
7091 set = get_varargs_alias_set ();
7092
7093 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7094 if (max > X86_64_REGPARM_MAX)
7095 max = X86_64_REGPARM_MAX;
7096
7097 for (i = cum->regno; i < max; i++)
7098 {
7099 mem = gen_rtx_MEM (Pmode,
7100 plus_constant (save_area, i * UNITS_PER_WORD));
7101 MEM_NOTRAP_P (mem) = 1;
7102 set_mem_alias_set (mem, set);
7103 emit_move_insn (mem, gen_rtx_REG (Pmode,
7104 x86_64_int_parameter_registers[i]));
7105 }
7106
7107 if (ix86_varargs_fpr_size)
7108 {
7109 enum machine_mode smode;
7110 rtx label, test;
7111
7112 /* Now emit code to save SSE registers. The AX parameter contains number
7113 of SSE parameter registers used to call this function, though all we
7114 actually check here is the zero/non-zero status. */
7115
7116 label = gen_label_rtx ();
7117 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7118 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7119 label));
7120
7121 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7122 we used movdqa (i.e. TImode) instead? Perhaps even better would
7123 be if we could determine the real mode of the data, via a hook
7124 into pass_stdarg. Ignore all that for now. */
7125 smode = V4SFmode;
7126 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7127 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7128
7129 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7130 if (max > X86_64_SSE_REGPARM_MAX)
7131 max = X86_64_SSE_REGPARM_MAX;
7132
7133 for (i = cum->sse_regno; i < max; ++i)
7134 {
7135 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7136 mem = gen_rtx_MEM (smode, mem);
7137 MEM_NOTRAP_P (mem) = 1;
7138 set_mem_alias_set (mem, set);
7139 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7140
7141 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7142 }
7143
7144 emit_label (label);
7145 }
7146 }
7147
7148 static void
7149 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7150 {
7151 alias_set_type set = get_varargs_alias_set ();
7152 int i;
7153
7154 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7155 {
7156 rtx reg, mem;
7157
7158 mem = gen_rtx_MEM (Pmode,
7159 plus_constant (virtual_incoming_args_rtx,
7160 i * UNITS_PER_WORD));
7161 MEM_NOTRAP_P (mem) = 1;
7162 set_mem_alias_set (mem, set);
7163
7164 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7165 emit_move_insn (mem, reg);
7166 }
7167 }
7168
7169 static void
7170 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7171 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7172 int no_rtl)
7173 {
7174 CUMULATIVE_ARGS next_cum;
7175 tree fntype;
7176
7177 /* This argument doesn't appear to be used anymore. Which is good,
7178 because the old code here didn't suppress rtl generation. */
7179 gcc_assert (!no_rtl);
7180
7181 if (!TARGET_64BIT)
7182 return;
7183
7184 fntype = TREE_TYPE (current_function_decl);
7185
7186 /* For varargs, we do not want to skip the dummy va_dcl argument.
7187 For stdargs, we do want to skip the last named argument. */
7188 next_cum = *cum;
7189 if (stdarg_p (fntype))
7190 ix86_function_arg_advance (&next_cum, mode, type, true);
7191
7192 if (cum->call_abi == MS_ABI)
7193 setup_incoming_varargs_ms_64 (&next_cum);
7194 else
7195 setup_incoming_varargs_64 (&next_cum);
7196 }
7197
7198 /* Checks if TYPE is of kind va_list char *. */
7199
7200 static bool
7201 is_va_list_char_pointer (tree type)
7202 {
7203 tree canonic;
7204
7205 /* For 32-bit it is always true. */
7206 if (!TARGET_64BIT)
7207 return true;
7208 canonic = ix86_canonical_va_list_type (type);
7209 return (canonic == ms_va_list_type_node
7210 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7211 }
7212
7213 /* Implement va_start. */
7214
7215 static void
7216 ix86_va_start (tree valist, rtx nextarg)
7217 {
7218 HOST_WIDE_INT words, n_gpr, n_fpr;
7219 tree f_gpr, f_fpr, f_ovf, f_sav;
7220 tree gpr, fpr, ovf, sav, t;
7221 tree type;
7222
7223 rtx ovf_rtx;
7224
7225 if (flag_split_stack
7226 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7227 {
7228 unsigned int scratch_regno;
7229
7230 /* When we are splitting the stack, we can't refer to the stack
7231 arguments using internal_arg_pointer, because they may be on
7232 the old stack. The split stack prologue will arrange to
7233 leave a pointer to the old stack arguments in a scratch
7234 register, which we here copy to a pseudo-register. The split
7235 stack prologue can't set the pseudo-register directly because
7236 it (the prologue) runs before any registers have been saved. */
7237
7238 scratch_regno = split_stack_prologue_scratch_regno ();
7239 if (scratch_regno != INVALID_REGNUM)
7240 {
7241 rtx reg, seq;
7242
7243 reg = gen_reg_rtx (Pmode);
7244 cfun->machine->split_stack_varargs_pointer = reg;
7245
7246 start_sequence ();
7247 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7248 seq = get_insns ();
7249 end_sequence ();
7250
7251 push_topmost_sequence ();
7252 emit_insn_after (seq, entry_of_function ());
7253 pop_topmost_sequence ();
7254 }
7255 }
7256
7257 /* Only 64bit target needs something special. */
7258 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7259 {
7260 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7261 std_expand_builtin_va_start (valist, nextarg);
7262 else
7263 {
7264 rtx va_r, next;
7265
7266 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7267 next = expand_binop (ptr_mode, add_optab,
7268 cfun->machine->split_stack_varargs_pointer,
7269 crtl->args.arg_offset_rtx,
7270 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7271 convert_move (va_r, next, 0);
7272 }
7273 return;
7274 }
7275
7276 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7277 f_fpr = DECL_CHAIN (f_gpr);
7278 f_ovf = DECL_CHAIN (f_fpr);
7279 f_sav = DECL_CHAIN (f_ovf);
7280
7281 valist = build_simple_mem_ref (valist);
7282 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7283 /* The following should be folded into the MEM_REF offset. */
7284 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7285 f_gpr, NULL_TREE);
7286 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7287 f_fpr, NULL_TREE);
7288 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7289 f_ovf, NULL_TREE);
7290 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7291 f_sav, NULL_TREE);
7292
7293 /* Count number of gp and fp argument registers used. */
7294 words = crtl->args.info.words;
7295 n_gpr = crtl->args.info.regno;
7296 n_fpr = crtl->args.info.sse_regno;
7297
7298 if (cfun->va_list_gpr_size)
7299 {
7300 type = TREE_TYPE (gpr);
7301 t = build2 (MODIFY_EXPR, type,
7302 gpr, build_int_cst (type, n_gpr * 8));
7303 TREE_SIDE_EFFECTS (t) = 1;
7304 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7305 }
7306
7307 if (TARGET_SSE && cfun->va_list_fpr_size)
7308 {
7309 type = TREE_TYPE (fpr);
7310 t = build2 (MODIFY_EXPR, type, fpr,
7311 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7312 TREE_SIDE_EFFECTS (t) = 1;
7313 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7314 }
7315
7316 /* Find the overflow area. */
7317 type = TREE_TYPE (ovf);
7318 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7319 ovf_rtx = crtl->args.internal_arg_pointer;
7320 else
7321 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
7322 t = make_tree (type, ovf_rtx);
7323 if (words != 0)
7324 t = build2 (POINTER_PLUS_EXPR, type, t,
7325 size_int (words * UNITS_PER_WORD));
7326 t = build2 (MODIFY_EXPR, type, ovf, t);
7327 TREE_SIDE_EFFECTS (t) = 1;
7328 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7329
7330 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7331 {
7332 /* Find the register save area.
7333 Prologue of the function save it right above stack frame. */
7334 type = TREE_TYPE (sav);
7335 t = make_tree (type, frame_pointer_rtx);
7336 if (!ix86_varargs_gpr_size)
7337 t = build2 (POINTER_PLUS_EXPR, type, t,
7338 size_int (-8 * X86_64_REGPARM_MAX));
7339 t = build2 (MODIFY_EXPR, type, sav, t);
7340 TREE_SIDE_EFFECTS (t) = 1;
7341 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7342 }
7343 }
7344
7345 /* Implement va_arg. */
7346
7347 static tree
7348 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7349 gimple_seq *post_p)
7350 {
7351 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7352 tree f_gpr, f_fpr, f_ovf, f_sav;
7353 tree gpr, fpr, ovf, sav, t;
7354 int size, rsize;
7355 tree lab_false, lab_over = NULL_TREE;
7356 tree addr, t2;
7357 rtx container;
7358 int indirect_p = 0;
7359 tree ptrtype;
7360 enum machine_mode nat_mode;
7361 unsigned int arg_boundary;
7362
7363 /* Only 64bit target needs something special. */
7364 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7365 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7366
7367 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7368 f_fpr = DECL_CHAIN (f_gpr);
7369 f_ovf = DECL_CHAIN (f_fpr);
7370 f_sav = DECL_CHAIN (f_ovf);
7371
7372 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7373 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7374 valist = build_va_arg_indirect_ref (valist);
7375 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7376 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7377 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7378
7379 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7380 if (indirect_p)
7381 type = build_pointer_type (type);
7382 size = int_size_in_bytes (type);
7383 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7384
7385 nat_mode = type_natural_mode (type, NULL);
7386 switch (nat_mode)
7387 {
7388 case V8SFmode:
7389 case V8SImode:
7390 case V32QImode:
7391 case V16HImode:
7392 case V4DFmode:
7393 case V4DImode:
7394 /* Unnamed 256bit vector mode parameters are passed on stack. */
7395 if (ix86_cfun_abi () == SYSV_ABI)
7396 {
7397 container = NULL;
7398 break;
7399 }
7400
7401 default:
7402 container = construct_container (nat_mode, TYPE_MODE (type),
7403 type, 0, X86_64_REGPARM_MAX,
7404 X86_64_SSE_REGPARM_MAX, intreg,
7405 0);
7406 break;
7407 }
7408
7409 /* Pull the value out of the saved registers. */
7410
7411 addr = create_tmp_var (ptr_type_node, "addr");
7412
7413 if (container)
7414 {
7415 int needed_intregs, needed_sseregs;
7416 bool need_temp;
7417 tree int_addr, sse_addr;
7418
7419 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7420 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7421
7422 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7423
7424 need_temp = (!REG_P (container)
7425 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7426 || TYPE_ALIGN (type) > 128));
7427
7428 /* In case we are passing structure, verify that it is consecutive block
7429 on the register save area. If not we need to do moves. */
7430 if (!need_temp && !REG_P (container))
7431 {
7432 /* Verify that all registers are strictly consecutive */
7433 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7434 {
7435 int i;
7436
7437 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7438 {
7439 rtx slot = XVECEXP (container, 0, i);
7440 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7441 || INTVAL (XEXP (slot, 1)) != i * 16)
7442 need_temp = 1;
7443 }
7444 }
7445 else
7446 {
7447 int i;
7448
7449 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7450 {
7451 rtx slot = XVECEXP (container, 0, i);
7452 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7453 || INTVAL (XEXP (slot, 1)) != i * 8)
7454 need_temp = 1;
7455 }
7456 }
7457 }
7458 if (!need_temp)
7459 {
7460 int_addr = addr;
7461 sse_addr = addr;
7462 }
7463 else
7464 {
7465 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7466 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7467 }
7468
7469 /* First ensure that we fit completely in registers. */
7470 if (needed_intregs)
7471 {
7472 t = build_int_cst (TREE_TYPE (gpr),
7473 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7474 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7475 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7476 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7477 gimplify_and_add (t, pre_p);
7478 }
7479 if (needed_sseregs)
7480 {
7481 t = build_int_cst (TREE_TYPE (fpr),
7482 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7483 + X86_64_REGPARM_MAX * 8);
7484 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7485 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7486 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7487 gimplify_and_add (t, pre_p);
7488 }
7489
7490 /* Compute index to start of area used for integer regs. */
7491 if (needed_intregs)
7492 {
7493 /* int_addr = gpr + sav; */
7494 t = fold_convert (sizetype, gpr);
7495 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7496 gimplify_assign (int_addr, t, pre_p);
7497 }
7498 if (needed_sseregs)
7499 {
7500 /* sse_addr = fpr + sav; */
7501 t = fold_convert (sizetype, fpr);
7502 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7503 gimplify_assign (sse_addr, t, pre_p);
7504 }
7505 if (need_temp)
7506 {
7507 int i, prev_size = 0;
7508 tree temp = create_tmp_var (type, "va_arg_tmp");
7509
7510 /* addr = &temp; */
7511 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7512 gimplify_assign (addr, t, pre_p);
7513
7514 for (i = 0; i < XVECLEN (container, 0); i++)
7515 {
7516 rtx slot = XVECEXP (container, 0, i);
7517 rtx reg = XEXP (slot, 0);
7518 enum machine_mode mode = GET_MODE (reg);
7519 tree piece_type;
7520 tree addr_type;
7521 tree daddr_type;
7522 tree src_addr, src;
7523 int src_offset;
7524 tree dest_addr, dest;
7525 int cur_size = GET_MODE_SIZE (mode);
7526
7527 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
7528 prev_size = INTVAL (XEXP (slot, 1));
7529 if (prev_size + cur_size > size)
7530 {
7531 cur_size = size - prev_size;
7532 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
7533 if (mode == BLKmode)
7534 mode = QImode;
7535 }
7536 piece_type = lang_hooks.types.type_for_mode (mode, 1);
7537 if (mode == GET_MODE (reg))
7538 addr_type = build_pointer_type (piece_type);
7539 else
7540 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7541 true);
7542 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7543 true);
7544
7545 if (SSE_REGNO_P (REGNO (reg)))
7546 {
7547 src_addr = sse_addr;
7548 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7549 }
7550 else
7551 {
7552 src_addr = int_addr;
7553 src_offset = REGNO (reg) * 8;
7554 }
7555 src_addr = fold_convert (addr_type, src_addr);
7556 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7557 size_int (src_offset));
7558
7559 dest_addr = fold_convert (daddr_type, addr);
7560 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7561 size_int (prev_size));
7562 if (cur_size == GET_MODE_SIZE (mode))
7563 {
7564 src = build_va_arg_indirect_ref (src_addr);
7565 dest = build_va_arg_indirect_ref (dest_addr);
7566
7567 gimplify_assign (dest, src, pre_p);
7568 }
7569 else
7570 {
7571 tree copy
7572 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
7573 3, dest_addr, src_addr,
7574 size_int (cur_size));
7575 gimplify_and_add (copy, pre_p);
7576 }
7577 prev_size += cur_size;
7578 }
7579 }
7580
7581 if (needed_intregs)
7582 {
7583 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7584 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7585 gimplify_assign (gpr, t, pre_p);
7586 }
7587
7588 if (needed_sseregs)
7589 {
7590 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7591 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7592 gimplify_assign (fpr, t, pre_p);
7593 }
7594
7595 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7596
7597 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7598 }
7599
7600 /* ... otherwise out of the overflow area. */
7601
7602 /* When we align parameter on stack for caller, if the parameter
7603 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7604 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7605 here with caller. */
7606 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7607 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7608 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7609
7610 /* Care for on-stack alignment if needed. */
7611 if (arg_boundary <= 64 || size == 0)
7612 t = ovf;
7613 else
7614 {
7615 HOST_WIDE_INT align = arg_boundary / 8;
7616 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7617 size_int (align - 1));
7618 t = fold_convert (sizetype, t);
7619 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7620 size_int (-align));
7621 t = fold_convert (TREE_TYPE (ovf), t);
7622 }
7623
7624 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7625 gimplify_assign (addr, t, pre_p);
7626
7627 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7628 size_int (rsize * UNITS_PER_WORD));
7629 gimplify_assign (unshare_expr (ovf), t, pre_p);
7630
7631 if (container)
7632 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7633
7634 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
7635 addr = fold_convert (ptrtype, addr);
7636
7637 if (indirect_p)
7638 addr = build_va_arg_indirect_ref (addr);
7639 return build_va_arg_indirect_ref (addr);
7640 }
7641 \f
7642 /* Return true if OPNUM's MEM should be matched
7643 in movabs* patterns. */
7644
7645 bool
7646 ix86_check_movabs (rtx insn, int opnum)
7647 {
7648 rtx set, mem;
7649
7650 set = PATTERN (insn);
7651 if (GET_CODE (set) == PARALLEL)
7652 set = XVECEXP (set, 0, 0);
7653 gcc_assert (GET_CODE (set) == SET);
7654 mem = XEXP (set, opnum);
7655 while (GET_CODE (mem) == SUBREG)
7656 mem = SUBREG_REG (mem);
7657 gcc_assert (MEM_P (mem));
7658 return volatile_ok || !MEM_VOLATILE_P (mem);
7659 }
7660 \f
7661 /* Initialize the table of extra 80387 mathematical constants. */
7662
7663 static void
7664 init_ext_80387_constants (void)
7665 {
7666 static const char * cst[5] =
7667 {
7668 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7669 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7670 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7671 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7672 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7673 };
7674 int i;
7675
7676 for (i = 0; i < 5; i++)
7677 {
7678 real_from_string (&ext_80387_constants_table[i], cst[i]);
7679 /* Ensure each constant is rounded to XFmode precision. */
7680 real_convert (&ext_80387_constants_table[i],
7681 XFmode, &ext_80387_constants_table[i]);
7682 }
7683
7684 ext_80387_constants_init = 1;
7685 }
7686
7687 /* Return non-zero if the constant is something that
7688 can be loaded with a special instruction. */
7689
7690 int
7691 standard_80387_constant_p (rtx x)
7692 {
7693 enum machine_mode mode = GET_MODE (x);
7694
7695 REAL_VALUE_TYPE r;
7696
7697 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7698 return -1;
7699
7700 if (x == CONST0_RTX (mode))
7701 return 1;
7702 if (x == CONST1_RTX (mode))
7703 return 2;
7704
7705 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7706
7707 /* For XFmode constants, try to find a special 80387 instruction when
7708 optimizing for size or on those CPUs that benefit from them. */
7709 if (mode == XFmode
7710 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7711 {
7712 int i;
7713
7714 if (! ext_80387_constants_init)
7715 init_ext_80387_constants ();
7716
7717 for (i = 0; i < 5; i++)
7718 if (real_identical (&r, &ext_80387_constants_table[i]))
7719 return i + 3;
7720 }
7721
7722 /* Load of the constant -0.0 or -1.0 will be split as
7723 fldz;fchs or fld1;fchs sequence. */
7724 if (real_isnegzero (&r))
7725 return 8;
7726 if (real_identical (&r, &dconstm1))
7727 return 9;
7728
7729 return 0;
7730 }
7731
7732 /* Return the opcode of the special instruction to be used to load
7733 the constant X. */
7734
7735 const char *
7736 standard_80387_constant_opcode (rtx x)
7737 {
7738 switch (standard_80387_constant_p (x))
7739 {
7740 case 1:
7741 return "fldz";
7742 case 2:
7743 return "fld1";
7744 case 3:
7745 return "fldlg2";
7746 case 4:
7747 return "fldln2";
7748 case 5:
7749 return "fldl2e";
7750 case 6:
7751 return "fldl2t";
7752 case 7:
7753 return "fldpi";
7754 case 8:
7755 case 9:
7756 return "#";
7757 default:
7758 gcc_unreachable ();
7759 }
7760 }
7761
7762 /* Return the CONST_DOUBLE representing the 80387 constant that is
7763 loaded by the specified special instruction. The argument IDX
7764 matches the return value from standard_80387_constant_p. */
7765
7766 rtx
7767 standard_80387_constant_rtx (int idx)
7768 {
7769 int i;
7770
7771 if (! ext_80387_constants_init)
7772 init_ext_80387_constants ();
7773
7774 switch (idx)
7775 {
7776 case 3:
7777 case 4:
7778 case 5:
7779 case 6:
7780 case 7:
7781 i = idx - 3;
7782 break;
7783
7784 default:
7785 gcc_unreachable ();
7786 }
7787
7788 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7789 XFmode);
7790 }
7791
7792 /* Return 1 if X is all 0s and 2 if x is all 1s
7793 in supported SSE vector mode. */
7794
7795 int
7796 standard_sse_constant_p (rtx x)
7797 {
7798 enum machine_mode mode = GET_MODE (x);
7799
7800 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7801 return 1;
7802 if (vector_all_ones_operand (x, mode))
7803 switch (mode)
7804 {
7805 case V16QImode:
7806 case V8HImode:
7807 case V4SImode:
7808 case V2DImode:
7809 if (TARGET_SSE2)
7810 return 2;
7811 default:
7812 break;
7813 }
7814
7815 return 0;
7816 }
7817
7818 /* Return the opcode of the special instruction to be used to load
7819 the constant X. */
7820
7821 const char *
7822 standard_sse_constant_opcode (rtx insn, rtx x)
7823 {
7824 switch (standard_sse_constant_p (x))
7825 {
7826 case 1:
7827 switch (get_attr_mode (insn))
7828 {
7829 case MODE_V4SF:
7830 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7831 case MODE_V2DF:
7832 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7833 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7834 else
7835 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7836 case MODE_TI:
7837 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7838 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7839 else
7840 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7841 case MODE_V8SF:
7842 return "vxorps\t%x0, %x0, %x0";
7843 case MODE_V4DF:
7844 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7845 return "vxorps\t%x0, %x0, %x0";
7846 else
7847 return "vxorpd\t%x0, %x0, %x0";
7848 case MODE_OI:
7849 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7850 return "vxorps\t%x0, %x0, %x0";
7851 else
7852 return "vpxor\t%x0, %x0, %x0";
7853 default:
7854 break;
7855 }
7856 case 2:
7857 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
7858 default:
7859 break;
7860 }
7861 gcc_unreachable ();
7862 }
7863
7864 /* Returns true if OP contains a symbol reference */
7865
7866 bool
7867 symbolic_reference_mentioned_p (rtx op)
7868 {
7869 const char *fmt;
7870 int i;
7871
7872 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7873 return true;
7874
7875 fmt = GET_RTX_FORMAT (GET_CODE (op));
7876 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7877 {
7878 if (fmt[i] == 'E')
7879 {
7880 int j;
7881
7882 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7883 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7884 return true;
7885 }
7886
7887 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7888 return true;
7889 }
7890
7891 return false;
7892 }
7893
7894 /* Return true if it is appropriate to emit `ret' instructions in the
7895 body of a function. Do this only if the epilogue is simple, needing a
7896 couple of insns. Prior to reloading, we can't tell how many registers
7897 must be saved, so return false then. Return false if there is no frame
7898 marker to de-allocate. */
7899
7900 bool
7901 ix86_can_use_return_insn_p (void)
7902 {
7903 struct ix86_frame frame;
7904
7905 if (! reload_completed || frame_pointer_needed)
7906 return 0;
7907
7908 /* Don't allow more than 32k pop, since that's all we can do
7909 with one instruction. */
7910 if (crtl->args.pops_args && crtl->args.size >= 32768)
7911 return 0;
7912
7913 ix86_compute_frame_layout (&frame);
7914 return (frame.stack_pointer_offset == UNITS_PER_WORD
7915 && (frame.nregs + frame.nsseregs) == 0);
7916 }
7917 \f
7918 /* Value should be nonzero if functions must have frame pointers.
7919 Zero means the frame pointer need not be set up (and parms may
7920 be accessed via the stack pointer) in functions that seem suitable. */
7921
7922 static bool
7923 ix86_frame_pointer_required (void)
7924 {
7925 /* If we accessed previous frames, then the generated code expects
7926 to be able to access the saved ebp value in our frame. */
7927 if (cfun->machine->accesses_prev_frame)
7928 return true;
7929
7930 /* Several x86 os'es need a frame pointer for other reasons,
7931 usually pertaining to setjmp. */
7932 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7933 return true;
7934
7935 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
7936 turns off the frame pointer by default. Turn it back on now if
7937 we've not got a leaf function. */
7938 if (TARGET_OMIT_LEAF_FRAME_POINTER
7939 && (!current_function_is_leaf
7940 || ix86_current_function_calls_tls_descriptor))
7941 return true;
7942
7943 if (crtl->profile && !flag_fentry)
7944 return true;
7945
7946 return false;
7947 }
7948
7949 /* Record that the current function accesses previous call frames. */
7950
7951 void
7952 ix86_setup_frame_addresses (void)
7953 {
7954 cfun->machine->accesses_prev_frame = 1;
7955 }
7956 \f
7957 #ifndef USE_HIDDEN_LINKONCE
7958 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7959 # define USE_HIDDEN_LINKONCE 1
7960 # else
7961 # define USE_HIDDEN_LINKONCE 0
7962 # endif
7963 #endif
7964
7965 static int pic_labels_used;
7966
7967 /* Fills in the label name that should be used for a pc thunk for
7968 the given register. */
7969
7970 static void
7971 get_pc_thunk_name (char name[32], unsigned int regno)
7972 {
7973 gcc_assert (!TARGET_64BIT);
7974
7975 if (USE_HIDDEN_LINKONCE)
7976 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7977 else
7978 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7979 }
7980
7981
7982 /* This function generates code for -fpic that loads %ebx with
7983 the return address of the caller and then returns. */
7984
7985 static void
7986 ix86_code_end (void)
7987 {
7988 rtx xops[2];
7989 int regno;
7990
7991 for (regno = AX_REG; regno <= SP_REG; regno++)
7992 {
7993 char name[32];
7994 tree decl;
7995
7996 if (!(pic_labels_used & (1 << regno)))
7997 continue;
7998
7999 get_pc_thunk_name (name, regno);
8000
8001 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8002 get_identifier (name),
8003 build_function_type (void_type_node, void_list_node));
8004 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8005 NULL_TREE, void_type_node);
8006 TREE_PUBLIC (decl) = 1;
8007 TREE_STATIC (decl) = 1;
8008
8009 #if TARGET_MACHO
8010 if (TARGET_MACHO)
8011 {
8012 switch_to_section (darwin_sections[text_coal_section]);
8013 fputs ("\t.weak_definition\t", asm_out_file);
8014 assemble_name (asm_out_file, name);
8015 fputs ("\n\t.private_extern\t", asm_out_file);
8016 assemble_name (asm_out_file, name);
8017 putc ('\n', asm_out_file);
8018 ASM_OUTPUT_LABEL (asm_out_file, name);
8019 DECL_WEAK (decl) = 1;
8020 }
8021 else
8022 #endif
8023 if (USE_HIDDEN_LINKONCE)
8024 {
8025 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8026
8027 targetm.asm_out.unique_section (decl, 0);
8028 switch_to_section (get_named_section (decl, NULL, 0));
8029
8030 targetm.asm_out.globalize_label (asm_out_file, name);
8031 fputs ("\t.hidden\t", asm_out_file);
8032 assemble_name (asm_out_file, name);
8033 putc ('\n', asm_out_file);
8034 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8035 }
8036 else
8037 {
8038 switch_to_section (text_section);
8039 ASM_OUTPUT_LABEL (asm_out_file, name);
8040 }
8041
8042 DECL_INITIAL (decl) = make_node (BLOCK);
8043 current_function_decl = decl;
8044 init_function_start (decl);
8045 first_function_block_is_cold = false;
8046 /* Make sure unwind info is emitted for the thunk if needed. */
8047 final_start_function (emit_barrier (), asm_out_file, 1);
8048
8049 /* Pad stack IP move with 4 instructions (two NOPs count
8050 as one instruction). */
8051 if (TARGET_PAD_SHORT_FUNCTION)
8052 {
8053 int i = 8;
8054
8055 while (i--)
8056 fputs ("\tnop\n", asm_out_file);
8057 }
8058
8059 xops[0] = gen_rtx_REG (Pmode, regno);
8060 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8061 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8062 fputs ("\tret\n", asm_out_file);
8063 final_end_function ();
8064 init_insn_lengths ();
8065 free_after_compilation (cfun);
8066 set_cfun (NULL);
8067 current_function_decl = NULL;
8068 }
8069
8070 if (flag_split_stack)
8071 file_end_indicate_split_stack ();
8072 }
8073
8074 /* Emit code for the SET_GOT patterns. */
8075
8076 const char *
8077 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8078 {
8079 rtx xops[3];
8080
8081 xops[0] = dest;
8082
8083 if (TARGET_VXWORKS_RTP && flag_pic)
8084 {
8085 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8086 xops[2] = gen_rtx_MEM (Pmode,
8087 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8088 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8089
8090 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8091 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8092 an unadorned address. */
8093 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8094 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8095 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8096 return "";
8097 }
8098
8099 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8100
8101 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
8102 {
8103 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8104
8105 if (!flag_pic)
8106 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8107 else
8108 {
8109 output_asm_insn ("call\t%a2", xops);
8110 #ifdef DWARF2_UNWIND_INFO
8111 /* The call to next label acts as a push. */
8112 if (dwarf2out_do_frame ())
8113 {
8114 rtx insn;
8115 start_sequence ();
8116 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8117 gen_rtx_PLUS (Pmode,
8118 stack_pointer_rtx,
8119 GEN_INT (-4))));
8120 RTX_FRAME_RELATED_P (insn) = 1;
8121 dwarf2out_frame_debug (insn, true);
8122 end_sequence ();
8123 }
8124 #endif
8125 }
8126
8127 #if TARGET_MACHO
8128 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8129 is what will be referenced by the Mach-O PIC subsystem. */
8130 if (!label)
8131 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8132 #endif
8133
8134 targetm.asm_out.internal_label (asm_out_file, "L",
8135 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8136
8137 if (flag_pic)
8138 {
8139 output_asm_insn ("pop%z0\t%0", xops);
8140 #ifdef DWARF2_UNWIND_INFO
8141 /* The pop is a pop and clobbers dest, but doesn't restore it
8142 for unwind info purposes. */
8143 if (dwarf2out_do_frame ())
8144 {
8145 rtx insn;
8146 start_sequence ();
8147 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
8148 dwarf2out_frame_debug (insn, true);
8149 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8150 gen_rtx_PLUS (Pmode,
8151 stack_pointer_rtx,
8152 GEN_INT (4))));
8153 RTX_FRAME_RELATED_P (insn) = 1;
8154 dwarf2out_frame_debug (insn, true);
8155 end_sequence ();
8156 }
8157 #endif
8158 }
8159 }
8160 else
8161 {
8162 char name[32];
8163 get_pc_thunk_name (name, REGNO (dest));
8164 pic_labels_used |= 1 << REGNO (dest);
8165
8166 #ifdef DWARF2_UNWIND_INFO
8167 /* Ensure all queued register saves are flushed before the
8168 call. */
8169 if (dwarf2out_do_frame ())
8170 dwarf2out_flush_queued_reg_saves ();
8171 #endif
8172 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8173 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8174 output_asm_insn ("call\t%X2", xops);
8175 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8176 is what will be referenced by the Mach-O PIC subsystem. */
8177 #if TARGET_MACHO
8178 if (!label)
8179 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8180 else
8181 targetm.asm_out.internal_label (asm_out_file, "L",
8182 CODE_LABEL_NUMBER (label));
8183 #endif
8184 }
8185
8186 if (TARGET_MACHO)
8187 return "";
8188
8189 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
8190 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8191 else
8192 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
8193
8194 return "";
8195 }
8196
8197 /* Generate an "push" pattern for input ARG. */
8198
8199 static rtx
8200 gen_push (rtx arg)
8201 {
8202 struct machine_function *m = cfun->machine;
8203
8204 if (m->fs.cfa_reg == stack_pointer_rtx)
8205 m->fs.cfa_offset += UNITS_PER_WORD;
8206 m->fs.sp_offset += UNITS_PER_WORD;
8207
8208 return gen_rtx_SET (VOIDmode,
8209 gen_rtx_MEM (Pmode,
8210 gen_rtx_PRE_DEC (Pmode,
8211 stack_pointer_rtx)),
8212 arg);
8213 }
8214
8215 /* Generate an "pop" pattern for input ARG. */
8216
8217 static rtx
8218 gen_pop (rtx arg)
8219 {
8220 return gen_rtx_SET (VOIDmode,
8221 arg,
8222 gen_rtx_MEM (Pmode,
8223 gen_rtx_POST_INC (Pmode,
8224 stack_pointer_rtx)));
8225 }
8226
8227 /* Return >= 0 if there is an unused call-clobbered register available
8228 for the entire function. */
8229
8230 static unsigned int
8231 ix86_select_alt_pic_regnum (void)
8232 {
8233 if (current_function_is_leaf
8234 && !crtl->profile
8235 && !ix86_current_function_calls_tls_descriptor)
8236 {
8237 int i, drap;
8238 /* Can't use the same register for both PIC and DRAP. */
8239 if (crtl->drap_reg)
8240 drap = REGNO (crtl->drap_reg);
8241 else
8242 drap = -1;
8243 for (i = 2; i >= 0; --i)
8244 if (i != drap && !df_regs_ever_live_p (i))
8245 return i;
8246 }
8247
8248 return INVALID_REGNUM;
8249 }
8250
8251 /* Return 1 if we need to save REGNO. */
8252 static int
8253 ix86_save_reg (unsigned int regno, int maybe_eh_return)
8254 {
8255 if (pic_offset_table_rtx
8256 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8257 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8258 || crtl->profile
8259 || crtl->calls_eh_return
8260 || crtl->uses_const_pool))
8261 {
8262 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
8263 return 0;
8264 return 1;
8265 }
8266
8267 if (crtl->calls_eh_return && maybe_eh_return)
8268 {
8269 unsigned i;
8270 for (i = 0; ; i++)
8271 {
8272 unsigned test = EH_RETURN_DATA_REGNO (i);
8273 if (test == INVALID_REGNUM)
8274 break;
8275 if (test == regno)
8276 return 1;
8277 }
8278 }
8279
8280 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8281 return 1;
8282
8283 return (df_regs_ever_live_p (regno)
8284 && !call_used_regs[regno]
8285 && !fixed_regs[regno]
8286 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8287 }
8288
8289 /* Return number of saved general prupose registers. */
8290
8291 static int
8292 ix86_nsaved_regs (void)
8293 {
8294 int nregs = 0;
8295 int regno;
8296
8297 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8298 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8299 nregs ++;
8300 return nregs;
8301 }
8302
8303 /* Return number of saved SSE registrers. */
8304
8305 static int
8306 ix86_nsaved_sseregs (void)
8307 {
8308 int nregs = 0;
8309 int regno;
8310
8311 if (ix86_cfun_abi () != MS_ABI)
8312 return 0;
8313 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8314 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8315 nregs ++;
8316 return nregs;
8317 }
8318
8319 /* Given FROM and TO register numbers, say whether this elimination is
8320 allowed. If stack alignment is needed, we can only replace argument
8321 pointer with hard frame pointer, or replace frame pointer with stack
8322 pointer. Otherwise, frame pointer elimination is automatically
8323 handled and all other eliminations are valid. */
8324
8325 static bool
8326 ix86_can_eliminate (const int from, const int to)
8327 {
8328 if (stack_realign_fp)
8329 return ((from == ARG_POINTER_REGNUM
8330 && to == HARD_FRAME_POINTER_REGNUM)
8331 || (from == FRAME_POINTER_REGNUM
8332 && to == STACK_POINTER_REGNUM));
8333 else
8334 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8335 }
8336
8337 /* Return the offset between two registers, one to be eliminated, and the other
8338 its replacement, at the start of a routine. */
8339
8340 HOST_WIDE_INT
8341 ix86_initial_elimination_offset (int from, int to)
8342 {
8343 struct ix86_frame frame;
8344 ix86_compute_frame_layout (&frame);
8345
8346 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8347 return frame.hard_frame_pointer_offset;
8348 else if (from == FRAME_POINTER_REGNUM
8349 && to == HARD_FRAME_POINTER_REGNUM)
8350 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8351 else
8352 {
8353 gcc_assert (to == STACK_POINTER_REGNUM);
8354
8355 if (from == ARG_POINTER_REGNUM)
8356 return frame.stack_pointer_offset;
8357
8358 gcc_assert (from == FRAME_POINTER_REGNUM);
8359 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8360 }
8361 }
8362
8363 /* In a dynamically-aligned function, we can't know the offset from
8364 stack pointer to frame pointer, so we must ensure that setjmp
8365 eliminates fp against the hard fp (%ebp) rather than trying to
8366 index from %esp up to the top of the frame across a gap that is
8367 of unknown (at compile-time) size. */
8368 static rtx
8369 ix86_builtin_setjmp_frame_value (void)
8370 {
8371 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8372 }
8373
8374 /* On the x86 -fsplit-stack and -fstack-protector both use the same
8375 field in the TCB, so they can not be used together. */
8376
8377 static bool
8378 ix86_supports_split_stack (bool report ATTRIBUTE_UNUSED)
8379 {
8380 bool ret = true;
8381
8382 #ifndef TARGET_THREAD_SPLIT_STACK_OFFSET
8383 if (report)
8384 error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
8385 ret = false;
8386 #else
8387 if (!HAVE_GAS_CFI_PERSONALITY_DIRECTIVE)
8388 {
8389 if (report)
8390 error ("%<-fsplit-stack%> requires "
8391 "assembler support for CFI directives");
8392 ret = false;
8393 }
8394 #endif
8395
8396 return ret;
8397 }
8398
8399 /* When using -fsplit-stack, the allocation routines set a field in
8400 the TCB to the bottom of the stack plus this much space, measured
8401 in bytes. */
8402
8403 #define SPLIT_STACK_AVAILABLE 256
8404
8405 /* Fill structure ix86_frame about frame of currently computed function. */
8406
8407 static void
8408 ix86_compute_frame_layout (struct ix86_frame *frame)
8409 {
8410 unsigned int stack_alignment_needed;
8411 HOST_WIDE_INT offset;
8412 unsigned int preferred_alignment;
8413 HOST_WIDE_INT size = get_frame_size ();
8414 HOST_WIDE_INT to_allocate;
8415
8416 frame->nregs = ix86_nsaved_regs ();
8417 frame->nsseregs = ix86_nsaved_sseregs ();
8418
8419 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8420 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8421
8422 /* MS ABI seem to require stack alignment to be always 16 except for function
8423 prologues and leaf. */
8424 if ((ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
8425 && (!current_function_is_leaf || cfun->calls_alloca != 0
8426 || ix86_current_function_calls_tls_descriptor))
8427 {
8428 preferred_alignment = 16;
8429 stack_alignment_needed = 16;
8430 crtl->preferred_stack_boundary = 128;
8431 crtl->stack_alignment_needed = 128;
8432 }
8433
8434 gcc_assert (!size || stack_alignment_needed);
8435 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8436 gcc_assert (preferred_alignment <= stack_alignment_needed);
8437
8438 /* During reload iteration the amount of registers saved can change.
8439 Recompute the value as needed. Do not recompute when amount of registers
8440 didn't change as reload does multiple calls to the function and does not
8441 expect the decision to change within single iteration. */
8442 if (!optimize_function_for_size_p (cfun)
8443 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8444 {
8445 int count = frame->nregs;
8446 struct cgraph_node *node = cgraph_node (current_function_decl);
8447
8448 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8449 /* The fast prologue uses move instead of push to save registers. This
8450 is significantly longer, but also executes faster as modern hardware
8451 can execute the moves in parallel, but can't do that for push/pop.
8452
8453 Be careful about choosing what prologue to emit: When function takes
8454 many instructions to execute we may use slow version as well as in
8455 case function is known to be outside hot spot (this is known with
8456 feedback only). Weight the size of function by number of registers
8457 to save as it is cheap to use one or two push instructions but very
8458 slow to use many of them. */
8459 if (count)
8460 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8461 if (node->frequency < NODE_FREQUENCY_NORMAL
8462 || (flag_branch_probabilities
8463 && node->frequency < NODE_FREQUENCY_HOT))
8464 cfun->machine->use_fast_prologue_epilogue = false;
8465 else
8466 cfun->machine->use_fast_prologue_epilogue
8467 = !expensive_function_p (count);
8468 }
8469 if (TARGET_PROLOGUE_USING_MOVE
8470 && cfun->machine->use_fast_prologue_epilogue)
8471 frame->save_regs_using_mov = true;
8472 else
8473 frame->save_regs_using_mov = false;
8474
8475 /* If static stack checking is enabled and done with probes, the registers
8476 need to be saved before allocating the frame. */
8477 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
8478 frame->save_regs_using_mov = false;
8479
8480 /* Skip return address. */
8481 offset = UNITS_PER_WORD;
8482
8483 /* Skip pushed static chain. */
8484 if (ix86_static_chain_on_stack)
8485 offset += UNITS_PER_WORD;
8486
8487 /* Skip saved base pointer. */
8488 if (frame_pointer_needed)
8489 offset += UNITS_PER_WORD;
8490
8491 frame->hard_frame_pointer_offset = offset;
8492
8493 /* Register save area */
8494 offset += frame->nregs * UNITS_PER_WORD;
8495 frame->reg_save_offset = offset;
8496
8497 /* Align and set SSE register save area. */
8498 if (frame->nsseregs)
8499 {
8500 /* The only ABI that has saved SSE registers (Win64) also has a
8501 16-byte aligned default stack, and thus we don't need to be
8502 within the re-aligned local stack frame to save them. */
8503 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
8504 offset = (offset + 16 - 1) & -16;
8505 offset += frame->nsseregs * 16;
8506 }
8507 frame->sse_reg_save_offset = offset;
8508
8509 /* The re-aligned stack starts here. Values before this point are not
8510 directly comparable with values below this point. In order to make
8511 sure that no value happens to be the same before and after, force
8512 the alignment computation below to add a non-zero value. */
8513 if (stack_realign_fp)
8514 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
8515
8516 /* Va-arg area */
8517 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8518 offset += frame->va_arg_size;
8519
8520 /* Align start of frame for local function. */
8521 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
8522
8523 /* Frame pointer points here. */
8524 frame->frame_pointer_offset = offset;
8525
8526 offset += size;
8527
8528 /* Add outgoing arguments area. Can be skipped if we eliminated
8529 all the function calls as dead code.
8530 Skipping is however impossible when function calls alloca. Alloca
8531 expander assumes that last crtl->outgoing_args_size
8532 of stack frame are unused. */
8533 if (ACCUMULATE_OUTGOING_ARGS
8534 && (!current_function_is_leaf || cfun->calls_alloca
8535 || ix86_current_function_calls_tls_descriptor))
8536 {
8537 offset += crtl->outgoing_args_size;
8538 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8539 }
8540 else
8541 frame->outgoing_arguments_size = 0;
8542
8543 /* Align stack boundary. Only needed if we're calling another function
8544 or using alloca. */
8545 if (!current_function_is_leaf || cfun->calls_alloca
8546 || ix86_current_function_calls_tls_descriptor)
8547 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
8548
8549 /* We've reached end of stack frame. */
8550 frame->stack_pointer_offset = offset;
8551
8552 /* Size prologue needs to allocate. */
8553 to_allocate = offset - frame->sse_reg_save_offset;
8554
8555 if ((!to_allocate && frame->nregs <= 1)
8556 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
8557 frame->save_regs_using_mov = false;
8558
8559 if (ix86_using_red_zone ()
8560 && current_function_sp_is_unchanging
8561 && current_function_is_leaf
8562 && !ix86_current_function_calls_tls_descriptor)
8563 {
8564 frame->red_zone_size = to_allocate;
8565 if (frame->save_regs_using_mov)
8566 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8567 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8568 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8569 }
8570 else
8571 frame->red_zone_size = 0;
8572 frame->stack_pointer_offset -= frame->red_zone_size;
8573 }
8574
8575 /* This is semi-inlined memory_address_length, but simplified
8576 since we know that we're always dealing with reg+offset, and
8577 to avoid having to create and discard all that rtl. */
8578
8579 static inline int
8580 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
8581 {
8582 int len = 4;
8583
8584 if (offset == 0)
8585 {
8586 /* EBP and R13 cannot be encoded without an offset. */
8587 len = (regno == BP_REG || regno == R13_REG);
8588 }
8589 else if (IN_RANGE (offset, -128, 127))
8590 len = 1;
8591
8592 /* ESP and R12 must be encoded with a SIB byte. */
8593 if (regno == SP_REG || regno == R12_REG)
8594 len++;
8595
8596 return len;
8597 }
8598
8599 /* Return an RTX that points to CFA_OFFSET within the stack frame.
8600 The valid base registers are taken from CFUN->MACHINE->FS. */
8601
8602 static rtx
8603 choose_baseaddr (HOST_WIDE_INT cfa_offset)
8604 {
8605 const struct machine_function *m = cfun->machine;
8606 rtx base_reg = NULL;
8607 HOST_WIDE_INT base_offset = 0;
8608
8609 if (m->use_fast_prologue_epilogue)
8610 {
8611 /* Choose the base register most likely to allow the most scheduling
8612 opportunities. Generally FP is valid througout the function,
8613 while DRAP must be reloaded within the epilogue. But choose either
8614 over the SP due to increased encoding size. */
8615
8616 if (m->fs.fp_valid)
8617 {
8618 base_reg = hard_frame_pointer_rtx;
8619 base_offset = m->fs.fp_offset - cfa_offset;
8620 }
8621 else if (m->fs.drap_valid)
8622 {
8623 base_reg = crtl->drap_reg;
8624 base_offset = 0 - cfa_offset;
8625 }
8626 else if (m->fs.sp_valid)
8627 {
8628 base_reg = stack_pointer_rtx;
8629 base_offset = m->fs.sp_offset - cfa_offset;
8630 }
8631 }
8632 else
8633 {
8634 HOST_WIDE_INT toffset;
8635 int len = 16, tlen;
8636
8637 /* Choose the base register with the smallest address encoding.
8638 With a tie, choose FP > DRAP > SP. */
8639 if (m->fs.sp_valid)
8640 {
8641 base_reg = stack_pointer_rtx;
8642 base_offset = m->fs.sp_offset - cfa_offset;
8643 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
8644 }
8645 if (m->fs.drap_valid)
8646 {
8647 toffset = 0 - cfa_offset;
8648 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
8649 if (tlen <= len)
8650 {
8651 base_reg = crtl->drap_reg;
8652 base_offset = toffset;
8653 len = tlen;
8654 }
8655 }
8656 if (m->fs.fp_valid)
8657 {
8658 toffset = m->fs.fp_offset - cfa_offset;
8659 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
8660 if (tlen <= len)
8661 {
8662 base_reg = hard_frame_pointer_rtx;
8663 base_offset = toffset;
8664 len = tlen;
8665 }
8666 }
8667 }
8668 gcc_assert (base_reg != NULL);
8669
8670 return plus_constant (base_reg, base_offset);
8671 }
8672
8673 /* Emit code to save registers in the prologue. */
8674
8675 static void
8676 ix86_emit_save_regs (void)
8677 {
8678 unsigned int regno;
8679 rtx insn;
8680
8681 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
8682 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8683 {
8684 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
8685 RTX_FRAME_RELATED_P (insn) = 1;
8686 }
8687 }
8688
8689 /* Emit a single register save at CFA - CFA_OFFSET. */
8690
8691 static void
8692 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
8693 HOST_WIDE_INT cfa_offset)
8694 {
8695 struct machine_function *m = cfun->machine;
8696 rtx reg = gen_rtx_REG (mode, regno);
8697 rtx mem, addr, base, insn;
8698
8699 addr = choose_baseaddr (cfa_offset);
8700 mem = gen_frame_mem (mode, addr);
8701
8702 /* For SSE saves, we need to indicate the 128-bit alignment. */
8703 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
8704
8705 insn = emit_move_insn (mem, reg);
8706 RTX_FRAME_RELATED_P (insn) = 1;
8707
8708 base = addr;
8709 if (GET_CODE (base) == PLUS)
8710 base = XEXP (base, 0);
8711 gcc_checking_assert (REG_P (base));
8712
8713 /* When saving registers into a re-aligned local stack frame, avoid
8714 any tricky guessing by dwarf2out. */
8715 if (m->fs.realigned)
8716 {
8717 gcc_checking_assert (stack_realign_drap);
8718
8719 if (regno == REGNO (crtl->drap_reg))
8720 {
8721 /* A bit of a hack. We force the DRAP register to be saved in
8722 the re-aligned stack frame, which provides us with a copy
8723 of the CFA that will last past the prologue. Install it. */
8724 gcc_checking_assert (cfun->machine->fs.fp_valid);
8725 addr = plus_constant (hard_frame_pointer_rtx,
8726 cfun->machine->fs.fp_offset - cfa_offset);
8727 mem = gen_rtx_MEM (mode, addr);
8728 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
8729 }
8730 else
8731 {
8732 /* The frame pointer is a stable reference within the
8733 aligned frame. Use it. */
8734 gcc_checking_assert (cfun->machine->fs.fp_valid);
8735 addr = plus_constant (hard_frame_pointer_rtx,
8736 cfun->machine->fs.fp_offset - cfa_offset);
8737 mem = gen_rtx_MEM (mode, addr);
8738 add_reg_note (insn, REG_CFA_EXPRESSION,
8739 gen_rtx_SET (VOIDmode, mem, reg));
8740 }
8741 }
8742
8743 /* The memory may not be relative to the current CFA register,
8744 which means that we may need to generate a new pattern for
8745 use by the unwind info. */
8746 else if (base != m->fs.cfa_reg)
8747 {
8748 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
8749 mem = gen_rtx_MEM (mode, addr);
8750 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
8751 }
8752 }
8753
8754 /* Emit code to save registers using MOV insns.
8755 First register is stored at CFA - CFA_OFFSET. */
8756 static void
8757 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
8758 {
8759 unsigned int regno;
8760
8761 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8762 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8763 {
8764 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
8765 cfa_offset -= UNITS_PER_WORD;
8766 }
8767 }
8768
8769 /* Emit code to save SSE registers using MOV insns.
8770 First register is stored at CFA - CFA_OFFSET. */
8771 static void
8772 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
8773 {
8774 unsigned int regno;
8775
8776 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8777 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8778 {
8779 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
8780 cfa_offset -= 16;
8781 }
8782 }
8783
8784 static GTY(()) rtx queued_cfa_restores;
8785
8786 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
8787 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
8788 Don't add the note if the previously saved value will be left untouched
8789 within stack red-zone till return, as unwinders can find the same value
8790 in the register and on the stack. */
8791
8792 static void
8793 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
8794 {
8795 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
8796 return;
8797
8798 if (insn)
8799 {
8800 add_reg_note (insn, REG_CFA_RESTORE, reg);
8801 RTX_FRAME_RELATED_P (insn) = 1;
8802 }
8803 else
8804 queued_cfa_restores
8805 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
8806 }
8807
8808 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
8809
8810 static void
8811 ix86_add_queued_cfa_restore_notes (rtx insn)
8812 {
8813 rtx last;
8814 if (!queued_cfa_restores)
8815 return;
8816 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
8817 ;
8818 XEXP (last, 1) = REG_NOTES (insn);
8819 REG_NOTES (insn) = queued_cfa_restores;
8820 queued_cfa_restores = NULL_RTX;
8821 RTX_FRAME_RELATED_P (insn) = 1;
8822 }
8823
8824 /* Expand prologue or epilogue stack adjustment.
8825 The pattern exist to put a dependency on all ebp-based memory accesses.
8826 STYLE should be negative if instructions should be marked as frame related,
8827 zero if %r11 register is live and cannot be freely used and positive
8828 otherwise. */
8829
8830 static void
8831 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
8832 int style, bool set_cfa)
8833 {
8834 struct machine_function *m = cfun->machine;
8835 rtx insn;
8836
8837 if (! TARGET_64BIT)
8838 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
8839 else if (x86_64_immediate_operand (offset, DImode))
8840 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
8841 else
8842 {
8843 rtx tmp;
8844 /* r11 is used by indirect sibcall return as well, set before the
8845 epilogue and used after the epilogue. */
8846 if (style)
8847 tmp = gen_rtx_REG (DImode, R11_REG);
8848 else
8849 {
8850 gcc_assert (src != hard_frame_pointer_rtx
8851 && dest != hard_frame_pointer_rtx);
8852 tmp = hard_frame_pointer_rtx;
8853 }
8854 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
8855 if (style < 0)
8856 RTX_FRAME_RELATED_P (insn) = 1;
8857
8858 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
8859 }
8860
8861 insn = emit_insn (insn);
8862 if (style >= 0)
8863 ix86_add_queued_cfa_restore_notes (insn);
8864
8865 if (set_cfa)
8866 {
8867 rtx r;
8868
8869 gcc_assert (m->fs.cfa_reg == src);
8870 m->fs.cfa_offset += INTVAL (offset);
8871 m->fs.cfa_reg = dest;
8872
8873 r = gen_rtx_PLUS (Pmode, src, offset);
8874 r = gen_rtx_SET (VOIDmode, dest, r);
8875 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
8876 RTX_FRAME_RELATED_P (insn) = 1;
8877 }
8878 else if (style < 0)
8879 RTX_FRAME_RELATED_P (insn) = 1;
8880
8881 if (dest == stack_pointer_rtx)
8882 {
8883 HOST_WIDE_INT ooffset = m->fs.sp_offset;
8884 bool valid = m->fs.sp_valid;
8885
8886 if (src == hard_frame_pointer_rtx)
8887 {
8888 valid = m->fs.fp_valid;
8889 ooffset = m->fs.fp_offset;
8890 }
8891 else if (src == crtl->drap_reg)
8892 {
8893 valid = m->fs.drap_valid;
8894 ooffset = 0;
8895 }
8896 else
8897 {
8898 /* Else there are two possibilities: SP itself, which we set
8899 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
8900 taken care of this by hand along the eh_return path. */
8901 gcc_checking_assert (src == stack_pointer_rtx
8902 || offset == const0_rtx);
8903 }
8904
8905 m->fs.sp_offset = ooffset - INTVAL (offset);
8906 m->fs.sp_valid = valid;
8907 }
8908 }
8909
8910 /* Find an available register to be used as dynamic realign argument
8911 pointer regsiter. Such a register will be written in prologue and
8912 used in begin of body, so it must not be
8913 1. parameter passing register.
8914 2. GOT pointer.
8915 We reuse static-chain register if it is available. Otherwise, we
8916 use DI for i386 and R13 for x86-64. We chose R13 since it has
8917 shorter encoding.
8918
8919 Return: the regno of chosen register. */
8920
8921 static unsigned int
8922 find_drap_reg (void)
8923 {
8924 tree decl = cfun->decl;
8925
8926 if (TARGET_64BIT)
8927 {
8928 /* Use R13 for nested function or function need static chain.
8929 Since function with tail call may use any caller-saved
8930 registers in epilogue, DRAP must not use caller-saved
8931 register in such case. */
8932 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8933 return R13_REG;
8934
8935 return R10_REG;
8936 }
8937 else
8938 {
8939 /* Use DI for nested function or function need static chain.
8940 Since function with tail call may use any caller-saved
8941 registers in epilogue, DRAP must not use caller-saved
8942 register in such case. */
8943 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8944 return DI_REG;
8945
8946 /* Reuse static chain register if it isn't used for parameter
8947 passing. */
8948 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8949 && !lookup_attribute ("fastcall",
8950 TYPE_ATTRIBUTES (TREE_TYPE (decl)))
8951 && !lookup_attribute ("thiscall",
8952 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8953 return CX_REG;
8954 else
8955 return DI_REG;
8956 }
8957 }
8958
8959 /* Return minimum incoming stack alignment. */
8960
8961 static unsigned int
8962 ix86_minimum_incoming_stack_boundary (bool sibcall)
8963 {
8964 unsigned int incoming_stack_boundary;
8965
8966 /* Prefer the one specified at command line. */
8967 if (ix86_user_incoming_stack_boundary)
8968 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
8969 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
8970 if -mstackrealign is used, it isn't used for sibcall check and
8971 estimated stack alignment is 128bit. */
8972 else if (!sibcall
8973 && !TARGET_64BIT
8974 && ix86_force_align_arg_pointer
8975 && crtl->stack_alignment_estimated == 128)
8976 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8977 else
8978 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
8979
8980 /* Incoming stack alignment can be changed on individual functions
8981 via force_align_arg_pointer attribute. We use the smallest
8982 incoming stack boundary. */
8983 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
8984 && lookup_attribute (ix86_force_align_arg_pointer_string,
8985 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8986 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8987
8988 /* The incoming stack frame has to be aligned at least at
8989 parm_stack_boundary. */
8990 if (incoming_stack_boundary < crtl->parm_stack_boundary)
8991 incoming_stack_boundary = crtl->parm_stack_boundary;
8992
8993 /* Stack at entrance of main is aligned by runtime. We use the
8994 smallest incoming stack boundary. */
8995 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
8996 && DECL_NAME (current_function_decl)
8997 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8998 && DECL_FILE_SCOPE_P (current_function_decl))
8999 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9000
9001 return incoming_stack_boundary;
9002 }
9003
9004 /* Update incoming stack boundary and estimated stack alignment. */
9005
9006 static void
9007 ix86_update_stack_boundary (void)
9008 {
9009 ix86_incoming_stack_boundary
9010 = ix86_minimum_incoming_stack_boundary (false);
9011
9012 /* x86_64 vararg needs 16byte stack alignment for register save
9013 area. */
9014 if (TARGET_64BIT
9015 && cfun->stdarg
9016 && crtl->stack_alignment_estimated < 128)
9017 crtl->stack_alignment_estimated = 128;
9018 }
9019
9020 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9021 needed or an rtx for DRAP otherwise. */
9022
9023 static rtx
9024 ix86_get_drap_rtx (void)
9025 {
9026 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9027 crtl->need_drap = true;
9028
9029 if (stack_realign_drap)
9030 {
9031 /* Assign DRAP to vDRAP and returns vDRAP */
9032 unsigned int regno = find_drap_reg ();
9033 rtx drap_vreg;
9034 rtx arg_ptr;
9035 rtx seq, insn;
9036
9037 arg_ptr = gen_rtx_REG (Pmode, regno);
9038 crtl->drap_reg = arg_ptr;
9039
9040 start_sequence ();
9041 drap_vreg = copy_to_reg (arg_ptr);
9042 seq = get_insns ();
9043 end_sequence ();
9044
9045 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9046 if (!optimize)
9047 {
9048 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9049 RTX_FRAME_RELATED_P (insn) = 1;
9050 }
9051 return drap_vreg;
9052 }
9053 else
9054 return NULL;
9055 }
9056
9057 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9058
9059 static rtx
9060 ix86_internal_arg_pointer (void)
9061 {
9062 return virtual_incoming_args_rtx;
9063 }
9064
9065 struct scratch_reg {
9066 rtx reg;
9067 bool saved;
9068 };
9069
9070 /* Return a short-lived scratch register for use on function entry.
9071 In 32-bit mode, it is valid only after the registers are saved
9072 in the prologue. This register must be released by means of
9073 release_scratch_register_on_entry once it is dead. */
9074
9075 static void
9076 get_scratch_register_on_entry (struct scratch_reg *sr)
9077 {
9078 int regno;
9079
9080 sr->saved = false;
9081
9082 if (TARGET_64BIT)
9083 {
9084 /* We always use R11 in 64-bit mode. */
9085 regno = R11_REG;
9086 }
9087 else
9088 {
9089 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9090 bool fastcall_p
9091 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9092 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9093 int regparm = ix86_function_regparm (fntype, decl);
9094 int drap_regno
9095 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9096
9097 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9098 for the static chain register. */
9099 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9100 && drap_regno != AX_REG)
9101 regno = AX_REG;
9102 else if (regparm < 2 && drap_regno != DX_REG)
9103 regno = DX_REG;
9104 /* ecx is the static chain register. */
9105 else if (regparm < 3 && !fastcall_p && !static_chain_p
9106 && drap_regno != CX_REG)
9107 regno = CX_REG;
9108 else if (ix86_save_reg (BX_REG, true))
9109 regno = BX_REG;
9110 /* esi is the static chain register. */
9111 else if (!(regparm == 3 && static_chain_p)
9112 && ix86_save_reg (SI_REG, true))
9113 regno = SI_REG;
9114 else if (ix86_save_reg (DI_REG, true))
9115 regno = DI_REG;
9116 else
9117 {
9118 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9119 sr->saved = true;
9120 }
9121 }
9122
9123 sr->reg = gen_rtx_REG (Pmode, regno);
9124 if (sr->saved)
9125 {
9126 rtx insn = emit_insn (gen_push (sr->reg));
9127 RTX_FRAME_RELATED_P (insn) = 1;
9128 }
9129 }
9130
9131 /* Release a scratch register obtained from the preceding function. */
9132
9133 static void
9134 release_scratch_register_on_entry (struct scratch_reg *sr)
9135 {
9136 if (sr->saved)
9137 {
9138 rtx x, insn = emit_insn (gen_pop (sr->reg));
9139
9140 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9141 RTX_FRAME_RELATED_P (insn) = 1;
9142 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9143 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9144 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9145 }
9146 }
9147
9148 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9149
9150 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9151
9152 static void
9153 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9154 {
9155 /* We skip the probe for the first interval + a small dope of 4 words and
9156 probe that many bytes past the specified size to maintain a protection
9157 area at the botton of the stack. */
9158 const int dope = 4 * UNITS_PER_WORD;
9159 rtx size_rtx = GEN_INT (size);
9160
9161 /* See if we have a constant small number of probes to generate. If so,
9162 that's the easy case. The run-time loop is made up of 11 insns in the
9163 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9164 for n # of intervals. */
9165 if (size <= 5 * PROBE_INTERVAL)
9166 {
9167 HOST_WIDE_INT i, adjust;
9168 bool first_probe = true;
9169
9170 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9171 values of N from 1 until it exceeds SIZE. If only one probe is
9172 needed, this will not generate any code. Then adjust and probe
9173 to PROBE_INTERVAL + SIZE. */
9174 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9175 {
9176 if (first_probe)
9177 {
9178 adjust = 2 * PROBE_INTERVAL + dope;
9179 first_probe = false;
9180 }
9181 else
9182 adjust = PROBE_INTERVAL;
9183
9184 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9185 plus_constant (stack_pointer_rtx, -adjust)));
9186 emit_stack_probe (stack_pointer_rtx);
9187 }
9188
9189 if (first_probe)
9190 adjust = size + PROBE_INTERVAL + dope;
9191 else
9192 adjust = size + PROBE_INTERVAL - i;
9193
9194 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9195 plus_constant (stack_pointer_rtx, -adjust)));
9196 emit_stack_probe (stack_pointer_rtx);
9197
9198 /* Adjust back to account for the additional first interval. */
9199 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9200 plus_constant (stack_pointer_rtx,
9201 PROBE_INTERVAL + dope)));
9202 }
9203
9204 /* Otherwise, do the same as above, but in a loop. Note that we must be
9205 extra careful with variables wrapping around because we might be at
9206 the very top (or the very bottom) of the address space and we have
9207 to be able to handle this case properly; in particular, we use an
9208 equality test for the loop condition. */
9209 else
9210 {
9211 HOST_WIDE_INT rounded_size;
9212 struct scratch_reg sr;
9213
9214 get_scratch_register_on_entry (&sr);
9215
9216
9217 /* Step 1: round SIZE to the previous multiple of the interval. */
9218
9219 rounded_size = size & -PROBE_INTERVAL;
9220
9221
9222 /* Step 2: compute initial and final value of the loop counter. */
9223
9224 /* SP = SP_0 + PROBE_INTERVAL. */
9225 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9226 plus_constant (stack_pointer_rtx,
9227 - (PROBE_INTERVAL + dope))));
9228
9229 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9230 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9231 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9232 gen_rtx_PLUS (Pmode, sr.reg,
9233 stack_pointer_rtx)));
9234
9235
9236 /* Step 3: the loop
9237
9238 while (SP != LAST_ADDR)
9239 {
9240 SP = SP + PROBE_INTERVAL
9241 probe at SP
9242 }
9243
9244 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9245 values of N from 1 until it is equal to ROUNDED_SIZE. */
9246
9247 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9248
9249
9250 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9251 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9252
9253 if (size != rounded_size)
9254 {
9255 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9256 plus_constant (stack_pointer_rtx,
9257 rounded_size - size)));
9258 emit_stack_probe (stack_pointer_rtx);
9259 }
9260
9261 /* Adjust back to account for the additional first interval. */
9262 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9263 plus_constant (stack_pointer_rtx,
9264 PROBE_INTERVAL + dope)));
9265
9266 release_scratch_register_on_entry (&sr);
9267 }
9268
9269 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9270 cfun->machine->fs.sp_offset += size;
9271
9272 /* Make sure nothing is scheduled before we are done. */
9273 emit_insn (gen_blockage ());
9274 }
9275
9276 /* Adjust the stack pointer up to REG while probing it. */
9277
9278 const char *
9279 output_adjust_stack_and_probe (rtx reg)
9280 {
9281 static int labelno = 0;
9282 char loop_lab[32], end_lab[32];
9283 rtx xops[2];
9284
9285 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9286 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9287
9288 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9289
9290 /* Jump to END_LAB if SP == LAST_ADDR. */
9291 xops[0] = stack_pointer_rtx;
9292 xops[1] = reg;
9293 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9294 fputs ("\tje\t", asm_out_file);
9295 assemble_name_raw (asm_out_file, end_lab);
9296 fputc ('\n', asm_out_file);
9297
9298 /* SP = SP + PROBE_INTERVAL. */
9299 xops[1] = GEN_INT (PROBE_INTERVAL);
9300 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9301
9302 /* Probe at SP. */
9303 xops[1] = const0_rtx;
9304 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9305
9306 fprintf (asm_out_file, "\tjmp\t");
9307 assemble_name_raw (asm_out_file, loop_lab);
9308 fputc ('\n', asm_out_file);
9309
9310 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9311
9312 return "";
9313 }
9314
9315 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9316 inclusive. These are offsets from the current stack pointer. */
9317
9318 static void
9319 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9320 {
9321 /* See if we have a constant small number of probes to generate. If so,
9322 that's the easy case. The run-time loop is made up of 7 insns in the
9323 generic case while the compile-time loop is made up of n insns for n #
9324 of intervals. */
9325 if (size <= 7 * PROBE_INTERVAL)
9326 {
9327 HOST_WIDE_INT i;
9328
9329 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9330 it exceeds SIZE. If only one probe is needed, this will not
9331 generate any code. Then probe at FIRST + SIZE. */
9332 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9333 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9334
9335 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9336 }
9337
9338 /* Otherwise, do the same as above, but in a loop. Note that we must be
9339 extra careful with variables wrapping around because we might be at
9340 the very top (or the very bottom) of the address space and we have
9341 to be able to handle this case properly; in particular, we use an
9342 equality test for the loop condition. */
9343 else
9344 {
9345 HOST_WIDE_INT rounded_size, last;
9346 struct scratch_reg sr;
9347
9348 get_scratch_register_on_entry (&sr);
9349
9350
9351 /* Step 1: round SIZE to the previous multiple of the interval. */
9352
9353 rounded_size = size & -PROBE_INTERVAL;
9354
9355
9356 /* Step 2: compute initial and final value of the loop counter. */
9357
9358 /* TEST_OFFSET = FIRST. */
9359 emit_move_insn (sr.reg, GEN_INT (-first));
9360
9361 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9362 last = first + rounded_size;
9363
9364
9365 /* Step 3: the loop
9366
9367 while (TEST_ADDR != LAST_ADDR)
9368 {
9369 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9370 probe at TEST_ADDR
9371 }
9372
9373 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9374 until it is equal to ROUNDED_SIZE. */
9375
9376 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9377
9378
9379 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9380 that SIZE is equal to ROUNDED_SIZE. */
9381
9382 if (size != rounded_size)
9383 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9384 stack_pointer_rtx,
9385 sr.reg),
9386 rounded_size - size));
9387
9388 release_scratch_register_on_entry (&sr);
9389 }
9390
9391 /* Make sure nothing is scheduled before we are done. */
9392 emit_insn (gen_blockage ());
9393 }
9394
9395 /* Probe a range of stack addresses from REG to END, inclusive. These are
9396 offsets from the current stack pointer. */
9397
9398 const char *
9399 output_probe_stack_range (rtx reg, rtx end)
9400 {
9401 static int labelno = 0;
9402 char loop_lab[32], end_lab[32];
9403 rtx xops[3];
9404
9405 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9406 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9407
9408 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9409
9410 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9411 xops[0] = reg;
9412 xops[1] = end;
9413 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9414 fputs ("\tje\t", asm_out_file);
9415 assemble_name_raw (asm_out_file, end_lab);
9416 fputc ('\n', asm_out_file);
9417
9418 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9419 xops[1] = GEN_INT (PROBE_INTERVAL);
9420 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9421
9422 /* Probe at TEST_ADDR. */
9423 xops[0] = stack_pointer_rtx;
9424 xops[1] = reg;
9425 xops[2] = const0_rtx;
9426 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9427
9428 fprintf (asm_out_file, "\tjmp\t");
9429 assemble_name_raw (asm_out_file, loop_lab);
9430 fputc ('\n', asm_out_file);
9431
9432 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9433
9434 return "";
9435 }
9436
9437 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9438 to be generated in correct form. */
9439 static void
9440 ix86_finalize_stack_realign_flags (void)
9441 {
9442 /* Check if stack realign is really needed after reload, and
9443 stores result in cfun */
9444 unsigned int incoming_stack_boundary
9445 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9446 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9447 unsigned int stack_realign = (incoming_stack_boundary
9448 < (current_function_is_leaf
9449 ? crtl->max_used_stack_slot_alignment
9450 : crtl->stack_alignment_needed));
9451
9452 if (crtl->stack_realign_finalized)
9453 {
9454 /* After stack_realign_needed is finalized, we can't no longer
9455 change it. */
9456 gcc_assert (crtl->stack_realign_needed == stack_realign);
9457 }
9458 else
9459 {
9460 crtl->stack_realign_needed = stack_realign;
9461 crtl->stack_realign_finalized = true;
9462 }
9463 }
9464
9465 /* Expand the prologue into a bunch of separate insns. */
9466
9467 void
9468 ix86_expand_prologue (void)
9469 {
9470 struct machine_function *m = cfun->machine;
9471 rtx insn, t;
9472 bool pic_reg_used;
9473 struct ix86_frame frame;
9474 HOST_WIDE_INT allocate;
9475 bool int_registers_saved;
9476
9477 ix86_finalize_stack_realign_flags ();
9478
9479 /* DRAP should not coexist with stack_realign_fp */
9480 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9481
9482 memset (&m->fs, 0, sizeof (m->fs));
9483
9484 /* Initialize CFA state for before the prologue. */
9485 m->fs.cfa_reg = stack_pointer_rtx;
9486 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9487
9488 /* Track SP offset to the CFA. We continue tracking this after we've
9489 swapped the CFA register away from SP. In the case of re-alignment
9490 this is fudged; we're interested to offsets within the local frame. */
9491 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9492 m->fs.sp_valid = true;
9493
9494 ix86_compute_frame_layout (&frame);
9495
9496 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9497 {
9498 /* We should have already generated an error for any use of
9499 ms_hook on a nested function. */
9500 gcc_checking_assert (!ix86_static_chain_on_stack);
9501
9502 /* Check if profiling is active and we shall use profiling before
9503 prologue variant. If so sorry. */
9504 if (crtl->profile && flag_fentry != 0)
9505 sorry ("ms_hook_prologue attribute isn't compatible with -mfentry for 32-bit");
9506
9507 /* In ix86_asm_output_function_label we emitted:
9508 8b ff movl.s %edi,%edi
9509 55 push %ebp
9510 8b ec movl.s %esp,%ebp
9511
9512 This matches the hookable function prologue in Win32 API
9513 functions in Microsoft Windows XP Service Pack 2 and newer.
9514 Wine uses this to enable Windows apps to hook the Win32 API
9515 functions provided by Wine.
9516
9517 What that means is that we've already set up the frame pointer. */
9518
9519 if (frame_pointer_needed
9520 && !(crtl->drap_reg && crtl->stack_realign_needed))
9521 {
9522 rtx push, mov;
9523
9524 /* We've decided to use the frame pointer already set up.
9525 Describe this to the unwinder by pretending that both
9526 push and mov insns happen right here.
9527
9528 Putting the unwind info here at the end of the ms_hook
9529 is done so that we can make absolutely certain we get
9530 the required byte sequence at the start of the function,
9531 rather than relying on an assembler that can produce
9532 the exact encoding required.
9533
9534 However it does mean (in the unpatched case) that we have
9535 a 1 insn window where the asynchronous unwind info is
9536 incorrect. However, if we placed the unwind info at
9537 its correct location we would have incorrect unwind info
9538 in the patched case. Which is probably all moot since
9539 I don't expect Wine generates dwarf2 unwind info for the
9540 system libraries that use this feature. */
9541
9542 insn = emit_insn (gen_blockage ());
9543
9544 push = gen_push (hard_frame_pointer_rtx);
9545 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
9546 stack_pointer_rtx);
9547 RTX_FRAME_RELATED_P (push) = 1;
9548 RTX_FRAME_RELATED_P (mov) = 1;
9549
9550 RTX_FRAME_RELATED_P (insn) = 1;
9551 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9552 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
9553
9554 /* Note that gen_push incremented m->fs.cfa_offset, even
9555 though we didn't emit the push insn here. */
9556 m->fs.cfa_reg = hard_frame_pointer_rtx;
9557 m->fs.fp_offset = m->fs.cfa_offset;
9558 m->fs.fp_valid = true;
9559 }
9560 else
9561 {
9562 /* The frame pointer is not needed so pop %ebp again.
9563 This leaves us with a pristine state. */
9564 emit_insn (gen_pop (hard_frame_pointer_rtx));
9565 }
9566 }
9567
9568 /* The first insn of a function that accepts its static chain on the
9569 stack is to push the register that would be filled in by a direct
9570 call. This insn will be skipped by the trampoline. */
9571 else if (ix86_static_chain_on_stack)
9572 {
9573 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
9574 emit_insn (gen_blockage ());
9575
9576 /* We don't want to interpret this push insn as a register save,
9577 only as a stack adjustment. The real copy of the register as
9578 a save will be done later, if needed. */
9579 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
9580 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9581 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9582 RTX_FRAME_RELATED_P (insn) = 1;
9583 }
9584
9585 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9586 of DRAP is needed and stack realignment is really needed after reload */
9587 if (stack_realign_drap)
9588 {
9589 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9590
9591 /* Only need to push parameter pointer reg if it is caller saved. */
9592 if (!call_used_regs[REGNO (crtl->drap_reg)])
9593 {
9594 /* Push arg pointer reg */
9595 insn = emit_insn (gen_push (crtl->drap_reg));
9596 RTX_FRAME_RELATED_P (insn) = 1;
9597 }
9598
9599 /* Grab the argument pointer. */
9600 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
9601 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9602 RTX_FRAME_RELATED_P (insn) = 1;
9603 m->fs.cfa_reg = crtl->drap_reg;
9604 m->fs.cfa_offset = 0;
9605
9606 /* Align the stack. */
9607 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9608 stack_pointer_rtx,
9609 GEN_INT (-align_bytes)));
9610 RTX_FRAME_RELATED_P (insn) = 1;
9611
9612 /* Replicate the return address on the stack so that return
9613 address can be reached via (argp - 1) slot. This is needed
9614 to implement macro RETURN_ADDR_RTX and intrinsic function
9615 expand_builtin_return_addr etc. */
9616 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
9617 t = gen_frame_mem (Pmode, t);
9618 insn = emit_insn (gen_push (t));
9619 RTX_FRAME_RELATED_P (insn) = 1;
9620
9621 /* For the purposes of frame and register save area addressing,
9622 we've started over with a new frame. */
9623 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9624 m->fs.realigned = true;
9625 }
9626
9627 if (frame_pointer_needed && !m->fs.fp_valid)
9628 {
9629 /* Note: AT&T enter does NOT have reversed args. Enter is probably
9630 slower on all targets. Also sdb doesn't like it. */
9631 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
9632 RTX_FRAME_RELATED_P (insn) = 1;
9633
9634 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9635 RTX_FRAME_RELATED_P (insn) = 1;
9636
9637 if (m->fs.cfa_reg == stack_pointer_rtx)
9638 m->fs.cfa_reg = hard_frame_pointer_rtx;
9639 gcc_assert (m->fs.sp_offset == frame.hard_frame_pointer_offset);
9640 m->fs.fp_offset = m->fs.sp_offset;
9641 m->fs.fp_valid = true;
9642 }
9643
9644 int_registers_saved = (frame.nregs == 0);
9645
9646 if (!int_registers_saved)
9647 {
9648 /* If saving registers via PUSH, do so now. */
9649 if (!frame.save_regs_using_mov)
9650 {
9651 ix86_emit_save_regs ();
9652 int_registers_saved = true;
9653 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
9654 }
9655
9656 /* When using red zone we may start register saving before allocating
9657 the stack frame saving one cycle of the prologue. However, avoid
9658 doing this if we have to probe the stack; at least on x86_64 the
9659 stack probe can turn into a call that clobbers a red zone location. */
9660 else if (ix86_using_red_zone ()
9661 && (! TARGET_STACK_PROBE
9662 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
9663 {
9664 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9665 int_registers_saved = true;
9666 }
9667 }
9668
9669 if (stack_realign_fp)
9670 {
9671 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9672 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
9673
9674 /* The computation of the size of the re-aligned stack frame means
9675 that we must allocate the size of the register save area before
9676 performing the actual alignment. Otherwise we cannot guarantee
9677 that there's enough storage above the realignment point. */
9678 if (m->fs.sp_offset != frame.sse_reg_save_offset)
9679 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9680 GEN_INT (m->fs.sp_offset
9681 - frame.sse_reg_save_offset),
9682 -1, false);
9683
9684 /* Align the stack. */
9685 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9686 stack_pointer_rtx,
9687 GEN_INT (-align_bytes)));
9688
9689 /* For the purposes of register save area addressing, the stack
9690 pointer is no longer valid. As for the value of sp_offset,
9691 see ix86_compute_frame_layout, which we need to match in order
9692 to pass verification of stack_pointer_offset at the end. */
9693 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
9694 m->fs.sp_valid = false;
9695 }
9696
9697 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
9698
9699 if (flag_stack_usage)
9700 {
9701 /* We start to count from ARG_POINTER. */
9702 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
9703
9704 /* If it was realigned, take into account the fake frame. */
9705 if (stack_realign_drap)
9706 {
9707 if (ix86_static_chain_on_stack)
9708 stack_size += UNITS_PER_WORD;
9709
9710 if (!call_used_regs[REGNO (crtl->drap_reg)])
9711 stack_size += UNITS_PER_WORD;
9712
9713 /* This over-estimates by 1 minimal-stack-alignment-unit but
9714 mitigates that by counting in the new return address slot. */
9715 current_function_dynamic_stack_size
9716 += crtl->stack_alignment_needed / BITS_PER_UNIT;
9717 }
9718
9719 current_function_static_stack_size = stack_size;
9720 }
9721
9722 /* The stack has already been decremented by the instruction calling us
9723 so we need to probe unconditionally to preserve the protection area. */
9724 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9725 {
9726 /* We expect the registers to be saved when probes are used. */
9727 gcc_assert (int_registers_saved);
9728
9729 if (STACK_CHECK_MOVING_SP)
9730 {
9731 ix86_adjust_stack_and_probe (allocate);
9732 allocate = 0;
9733 }
9734 else
9735 {
9736 HOST_WIDE_INT size = allocate;
9737
9738 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
9739 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
9740
9741 if (TARGET_STACK_PROBE)
9742 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
9743 else
9744 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
9745 }
9746 }
9747
9748 if (allocate == 0)
9749 ;
9750 else if (!ix86_target_stack_probe ()
9751 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
9752 {
9753 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9754 GEN_INT (-allocate), -1,
9755 m->fs.cfa_reg == stack_pointer_rtx);
9756 }
9757 else
9758 {
9759 rtx eax = gen_rtx_REG (Pmode, AX_REG);
9760 rtx r10 = NULL;
9761 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
9762
9763 bool eax_live = false;
9764 bool r10_live = false;
9765
9766 if (TARGET_64BIT)
9767 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
9768 if (!TARGET_64BIT_MS_ABI)
9769 eax_live = ix86_eax_live_at_start_p ();
9770
9771 if (eax_live)
9772 {
9773 emit_insn (gen_push (eax));
9774 allocate -= UNITS_PER_WORD;
9775 }
9776 if (r10_live)
9777 {
9778 r10 = gen_rtx_REG (Pmode, R10_REG);
9779 emit_insn (gen_push (r10));
9780 allocate -= UNITS_PER_WORD;
9781 }
9782
9783 emit_move_insn (eax, GEN_INT (allocate));
9784 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
9785
9786 /* Use the fact that AX still contains ALLOCATE. */
9787 adjust_stack_insn = (TARGET_64BIT
9788 ? gen_pro_epilogue_adjust_stack_di_sub
9789 : gen_pro_epilogue_adjust_stack_si_sub);
9790
9791 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
9792 stack_pointer_rtx, eax));
9793
9794 if (m->fs.cfa_reg == stack_pointer_rtx)
9795 {
9796 m->fs.cfa_offset += allocate;
9797
9798 RTX_FRAME_RELATED_P (insn) = 1;
9799 add_reg_note (insn, REG_CFA_ADJUST_CFA,
9800 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9801 plus_constant (stack_pointer_rtx,
9802 -allocate)));
9803 }
9804 m->fs.sp_offset += allocate;
9805
9806 if (r10_live && eax_live)
9807 {
9808 t = choose_baseaddr (m->fs.sp_offset - allocate);
9809 emit_move_insn (r10, gen_frame_mem (Pmode, t));
9810 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
9811 emit_move_insn (eax, gen_frame_mem (Pmode, t));
9812 }
9813 else if (eax_live || r10_live)
9814 {
9815 t = choose_baseaddr (m->fs.sp_offset - allocate);
9816 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
9817 }
9818 }
9819 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
9820
9821 if (!int_registers_saved)
9822 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9823 if (frame.nsseregs)
9824 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
9825
9826 pic_reg_used = false;
9827 if (pic_offset_table_rtx
9828 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9829 || crtl->profile))
9830 {
9831 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
9832
9833 if (alt_pic_reg_used != INVALID_REGNUM)
9834 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
9835
9836 pic_reg_used = true;
9837 }
9838
9839 if (pic_reg_used)
9840 {
9841 if (TARGET_64BIT)
9842 {
9843 if (ix86_cmodel == CM_LARGE_PIC)
9844 {
9845 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
9846 rtx label = gen_label_rtx ();
9847 emit_label (label);
9848 LABEL_PRESERVE_P (label) = 1;
9849 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
9850 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
9851 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
9852 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
9853 pic_offset_table_rtx, tmp_reg));
9854 }
9855 else
9856 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
9857 }
9858 else
9859 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
9860 }
9861
9862 /* In the pic_reg_used case, make sure that the got load isn't deleted
9863 when mcount needs it. Blockage to avoid call movement across mcount
9864 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
9865 note. */
9866 if (crtl->profile && !flag_fentry && pic_reg_used)
9867 emit_insn (gen_prologue_use (pic_offset_table_rtx));
9868
9869 if (crtl->drap_reg && !crtl->stack_realign_needed)
9870 {
9871 /* vDRAP is setup but after reload it turns out stack realign
9872 isn't necessary, here we will emit prologue to setup DRAP
9873 without stack realign adjustment */
9874 t = choose_baseaddr (0);
9875 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9876 }
9877
9878 /* Prevent instructions from being scheduled into register save push
9879 sequence when access to the redzone area is done through frame pointer.
9880 The offset between the frame pointer and the stack pointer is calculated
9881 relative to the value of the stack pointer at the end of the function
9882 prologue, and moving instructions that access redzone area via frame
9883 pointer inside push sequence violates this assumption. */
9884 if (frame_pointer_needed && frame.red_zone_size)
9885 emit_insn (gen_memory_blockage ());
9886
9887 /* Emit cld instruction if stringops are used in the function. */
9888 if (TARGET_CLD && ix86_current_function_needs_cld)
9889 emit_insn (gen_cld ());
9890 }
9891
9892 /* Emit code to restore REG using a POP insn. */
9893
9894 static void
9895 ix86_emit_restore_reg_using_pop (rtx reg)
9896 {
9897 struct machine_function *m = cfun->machine;
9898 rtx insn = emit_insn (gen_pop (reg));
9899
9900 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
9901 m->fs.sp_offset -= UNITS_PER_WORD;
9902
9903 if (m->fs.cfa_reg == crtl->drap_reg
9904 && REGNO (reg) == REGNO (crtl->drap_reg))
9905 {
9906 /* Previously we'd represented the CFA as an expression
9907 like *(%ebp - 8). We've just popped that value from
9908 the stack, which means we need to reset the CFA to
9909 the drap register. This will remain until we restore
9910 the stack pointer. */
9911 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9912 RTX_FRAME_RELATED_P (insn) = 1;
9913
9914 /* This means that the DRAP register is valid for addressing too. */
9915 m->fs.drap_valid = true;
9916 return;
9917 }
9918
9919 if (m->fs.cfa_reg == stack_pointer_rtx)
9920 {
9921 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
9922 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9923 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9924 RTX_FRAME_RELATED_P (insn) = 1;
9925
9926 m->fs.cfa_offset -= UNITS_PER_WORD;
9927 }
9928
9929 /* When the frame pointer is the CFA, and we pop it, we are
9930 swapping back to the stack pointer as the CFA. This happens
9931 for stack frames that don't allocate other data, so we assume
9932 the stack pointer is now pointing at the return address, i.e.
9933 the function entry state, which makes the offset be 1 word. */
9934 if (reg == hard_frame_pointer_rtx)
9935 {
9936 m->fs.fp_valid = false;
9937 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9938 {
9939 m->fs.cfa_reg = stack_pointer_rtx;
9940 m->fs.cfa_offset -= UNITS_PER_WORD;
9941
9942 add_reg_note (insn, REG_CFA_DEF_CFA,
9943 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9944 GEN_INT (m->fs.cfa_offset)));
9945 RTX_FRAME_RELATED_P (insn) = 1;
9946 }
9947 }
9948 }
9949
9950 /* Emit code to restore saved registers using POP insns. */
9951
9952 static void
9953 ix86_emit_restore_regs_using_pop (void)
9954 {
9955 unsigned int regno;
9956
9957 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9958 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
9959 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
9960 }
9961
9962 /* Emit code and notes for the LEAVE instruction. */
9963
9964 static void
9965 ix86_emit_leave (void)
9966 {
9967 struct machine_function *m = cfun->machine;
9968 rtx insn = emit_insn (ix86_gen_leave ());
9969
9970 ix86_add_queued_cfa_restore_notes (insn);
9971
9972 gcc_assert (m->fs.fp_valid);
9973 m->fs.sp_valid = true;
9974 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
9975 m->fs.fp_valid = false;
9976
9977 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9978 {
9979 m->fs.cfa_reg = stack_pointer_rtx;
9980 m->fs.cfa_offset = m->fs.sp_offset;
9981
9982 add_reg_note (insn, REG_CFA_DEF_CFA,
9983 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
9984 RTX_FRAME_RELATED_P (insn) = 1;
9985 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
9986 m->fs.fp_offset);
9987 }
9988 }
9989
9990 /* Emit code to restore saved registers using MOV insns.
9991 First register is restored from CFA - CFA_OFFSET. */
9992 static void
9993 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
9994 int maybe_eh_return)
9995 {
9996 struct machine_function *m = cfun->machine;
9997 unsigned int regno;
9998
9999 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10000 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10001 {
10002 rtx reg = gen_rtx_REG (Pmode, regno);
10003 rtx insn, mem;
10004
10005 mem = choose_baseaddr (cfa_offset);
10006 mem = gen_frame_mem (Pmode, mem);
10007 insn = emit_move_insn (reg, mem);
10008
10009 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10010 {
10011 /* Previously we'd represented the CFA as an expression
10012 like *(%ebp - 8). We've just popped that value from
10013 the stack, which means we need to reset the CFA to
10014 the drap register. This will remain until we restore
10015 the stack pointer. */
10016 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10017 RTX_FRAME_RELATED_P (insn) = 1;
10018
10019 /* This means that the DRAP register is valid for addressing. */
10020 m->fs.drap_valid = true;
10021 }
10022 else
10023 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10024
10025 cfa_offset -= UNITS_PER_WORD;
10026 }
10027 }
10028
10029 /* Emit code to restore saved registers using MOV insns.
10030 First register is restored from CFA - CFA_OFFSET. */
10031 static void
10032 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10033 int maybe_eh_return)
10034 {
10035 unsigned int regno;
10036
10037 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10038 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10039 {
10040 rtx reg = gen_rtx_REG (V4SFmode, regno);
10041 rtx mem;
10042
10043 mem = choose_baseaddr (cfa_offset);
10044 mem = gen_rtx_MEM (V4SFmode, mem);
10045 set_mem_align (mem, 128);
10046 emit_move_insn (reg, mem);
10047
10048 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10049
10050 cfa_offset -= 16;
10051 }
10052 }
10053
10054 /* Restore function stack, frame, and registers. */
10055
10056 void
10057 ix86_expand_epilogue (int style)
10058 {
10059 struct machine_function *m = cfun->machine;
10060 struct machine_frame_state frame_state_save = m->fs;
10061 struct ix86_frame frame;
10062 bool restore_regs_via_mov;
10063 bool using_drap;
10064
10065 ix86_finalize_stack_realign_flags ();
10066 ix86_compute_frame_layout (&frame);
10067
10068 m->fs.sp_valid = (!frame_pointer_needed
10069 || (current_function_sp_is_unchanging
10070 && !stack_realign_fp));
10071 gcc_assert (!m->fs.sp_valid
10072 || m->fs.sp_offset == frame.stack_pointer_offset);
10073
10074 /* The FP must be valid if the frame pointer is present. */
10075 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10076 gcc_assert (!m->fs.fp_valid
10077 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10078
10079 /* We must have *some* valid pointer to the stack frame. */
10080 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10081
10082 /* The DRAP is never valid at this point. */
10083 gcc_assert (!m->fs.drap_valid);
10084
10085 /* See the comment about red zone and frame
10086 pointer usage in ix86_expand_prologue. */
10087 if (frame_pointer_needed && frame.red_zone_size)
10088 emit_insn (gen_memory_blockage ());
10089
10090 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10091 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10092
10093 /* Determine the CFA offset of the end of the red-zone. */
10094 m->fs.red_zone_offset = 0;
10095 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10096 {
10097 /* The red-zone begins below the return address. */
10098 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10099
10100 /* When the register save area is in the aligned portion of
10101 the stack, determine the maximum runtime displacement that
10102 matches up with the aligned frame. */
10103 if (stack_realign_drap)
10104 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10105 + UNITS_PER_WORD);
10106 }
10107
10108 /* Special care must be taken for the normal return case of a function
10109 using eh_return: the eax and edx registers are marked as saved, but
10110 not restored along this path. Adjust the save location to match. */
10111 if (crtl->calls_eh_return && style != 2)
10112 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10113
10114 /* If we're only restoring one register and sp is not valid then
10115 using a move instruction to restore the register since it's
10116 less work than reloading sp and popping the register. */
10117 if (!m->fs.sp_valid && frame.nregs <= 1)
10118 restore_regs_via_mov = true;
10119 /* EH_RETURN requires the use of moves to function properly. */
10120 else if (crtl->calls_eh_return)
10121 restore_regs_via_mov = true;
10122 else if (TARGET_EPILOGUE_USING_MOVE
10123 && cfun->machine->use_fast_prologue_epilogue
10124 && (frame.nregs > 1
10125 || m->fs.sp_offset != frame.reg_save_offset))
10126 restore_regs_via_mov = true;
10127 else if (frame_pointer_needed
10128 && !frame.nregs
10129 && m->fs.sp_offset != frame.reg_save_offset)
10130 restore_regs_via_mov = true;
10131 else if (frame_pointer_needed
10132 && TARGET_USE_LEAVE
10133 && cfun->machine->use_fast_prologue_epilogue
10134 && frame.nregs == 1)
10135 restore_regs_via_mov = true;
10136 else
10137 restore_regs_via_mov = false;
10138
10139 if (restore_regs_via_mov || frame.nsseregs)
10140 {
10141 /* Ensure that the entire register save area is addressable via
10142 the stack pointer, if we will restore via sp. */
10143 if (TARGET_64BIT
10144 && m->fs.sp_offset > 0x7fffffff
10145 && !(m->fs.fp_valid || m->fs.drap_valid)
10146 && (frame.nsseregs + frame.nregs) != 0)
10147 {
10148 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10149 GEN_INT (m->fs.sp_offset
10150 - frame.sse_reg_save_offset),
10151 style,
10152 m->fs.cfa_reg == stack_pointer_rtx);
10153 }
10154 }
10155
10156 /* If there are any SSE registers to restore, then we have to do it
10157 via moves, since there's obviously no pop for SSE regs. */
10158 if (frame.nsseregs)
10159 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10160 style == 2);
10161
10162 if (restore_regs_via_mov)
10163 {
10164 rtx t;
10165
10166 if (frame.nregs)
10167 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10168
10169 /* eh_return epilogues need %ecx added to the stack pointer. */
10170 if (style == 2)
10171 {
10172 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10173
10174 /* Stack align doesn't work with eh_return. */
10175 gcc_assert (!stack_realign_drap);
10176 /* Neither does regparm nested functions. */
10177 gcc_assert (!ix86_static_chain_on_stack);
10178
10179 if (frame_pointer_needed)
10180 {
10181 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10182 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10183 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10184
10185 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10186 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10187
10188 /* Note that we use SA as a temporary CFA, as the return
10189 address is at the proper place relative to it. We
10190 pretend this happens at the FP restore insn because
10191 prior to this insn the FP would be stored at the wrong
10192 offset relative to SA, and after this insn we have no
10193 other reasonable register to use for the CFA. We don't
10194 bother resetting the CFA to the SP for the duration of
10195 the return insn. */
10196 add_reg_note (insn, REG_CFA_DEF_CFA,
10197 plus_constant (sa, UNITS_PER_WORD));
10198 ix86_add_queued_cfa_restore_notes (insn);
10199 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10200 RTX_FRAME_RELATED_P (insn) = 1;
10201
10202 m->fs.cfa_reg = sa;
10203 m->fs.cfa_offset = UNITS_PER_WORD;
10204 m->fs.fp_valid = false;
10205
10206 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10207 const0_rtx, style, false);
10208 }
10209 else
10210 {
10211 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10212 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10213 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10214 ix86_add_queued_cfa_restore_notes (insn);
10215
10216 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10217 if (m->fs.cfa_offset != UNITS_PER_WORD)
10218 {
10219 m->fs.cfa_offset = UNITS_PER_WORD;
10220 add_reg_note (insn, REG_CFA_DEF_CFA,
10221 plus_constant (stack_pointer_rtx,
10222 UNITS_PER_WORD));
10223 RTX_FRAME_RELATED_P (insn) = 1;
10224 }
10225 }
10226 m->fs.sp_offset = UNITS_PER_WORD;
10227 m->fs.sp_valid = true;
10228 }
10229 }
10230 else
10231 {
10232 /* First step is to deallocate the stack frame so that we can
10233 pop the registers. */
10234 if (!m->fs.sp_valid)
10235 {
10236 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10237 GEN_INT (m->fs.fp_offset
10238 - frame.reg_save_offset),
10239 style, false);
10240 }
10241 else if (m->fs.sp_offset != frame.reg_save_offset)
10242 {
10243 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10244 GEN_INT (m->fs.sp_offset
10245 - frame.reg_save_offset),
10246 style,
10247 m->fs.cfa_reg == stack_pointer_rtx);
10248 }
10249
10250 ix86_emit_restore_regs_using_pop ();
10251 }
10252
10253 /* If we used a stack pointer and haven't already got rid of it,
10254 then do so now. */
10255 if (m->fs.fp_valid)
10256 {
10257 /* If the stack pointer is valid and pointing at the frame
10258 pointer store address, then we only need a pop. */
10259 if (m->fs.sp_valid && m->fs.sp_offset == frame.hard_frame_pointer_offset)
10260 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10261 /* Leave results in shorter dependency chains on CPUs that are
10262 able to grok it fast. */
10263 else if (TARGET_USE_LEAVE
10264 || optimize_function_for_size_p (cfun)
10265 || !cfun->machine->use_fast_prologue_epilogue)
10266 ix86_emit_leave ();
10267 else
10268 {
10269 pro_epilogue_adjust_stack (stack_pointer_rtx,
10270 hard_frame_pointer_rtx,
10271 const0_rtx, style, !using_drap);
10272 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10273 }
10274 }
10275
10276 if (using_drap)
10277 {
10278 int param_ptr_offset = UNITS_PER_WORD;
10279 rtx insn;
10280
10281 gcc_assert (stack_realign_drap);
10282
10283 if (ix86_static_chain_on_stack)
10284 param_ptr_offset += UNITS_PER_WORD;
10285 if (!call_used_regs[REGNO (crtl->drap_reg)])
10286 param_ptr_offset += UNITS_PER_WORD;
10287
10288 insn = emit_insn (gen_rtx_SET
10289 (VOIDmode, stack_pointer_rtx,
10290 gen_rtx_PLUS (Pmode,
10291 crtl->drap_reg,
10292 GEN_INT (-param_ptr_offset))));
10293 m->fs.cfa_reg = stack_pointer_rtx;
10294 m->fs.cfa_offset = param_ptr_offset;
10295 m->fs.sp_offset = param_ptr_offset;
10296 m->fs.realigned = false;
10297
10298 add_reg_note (insn, REG_CFA_DEF_CFA,
10299 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10300 GEN_INT (param_ptr_offset)));
10301 RTX_FRAME_RELATED_P (insn) = 1;
10302
10303 if (!call_used_regs[REGNO (crtl->drap_reg)])
10304 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10305 }
10306
10307 /* At this point the stack pointer must be valid, and we must have
10308 restored all of the registers. We may not have deallocated the
10309 entire stack frame. We've delayed this until now because it may
10310 be possible to merge the local stack deallocation with the
10311 deallocation forced by ix86_static_chain_on_stack. */
10312 gcc_assert (m->fs.sp_valid);
10313 gcc_assert (!m->fs.fp_valid);
10314 gcc_assert (!m->fs.realigned);
10315 if (m->fs.sp_offset != UNITS_PER_WORD)
10316 {
10317 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10318 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10319 style, true);
10320 }
10321
10322 /* Sibcall epilogues don't want a return instruction. */
10323 if (style == 0)
10324 {
10325 m->fs = frame_state_save;
10326 return;
10327 }
10328
10329 if (crtl->args.pops_args && crtl->args.size)
10330 {
10331 rtx popc = GEN_INT (crtl->args.pops_args);
10332
10333 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10334 address, do explicit add, and jump indirectly to the caller. */
10335
10336 if (crtl->args.pops_args >= 65536)
10337 {
10338 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10339 rtx insn;
10340
10341 /* There is no "pascal" calling convention in any 64bit ABI. */
10342 gcc_assert (!TARGET_64BIT);
10343
10344 insn = emit_insn (gen_pop (ecx));
10345 m->fs.cfa_offset -= UNITS_PER_WORD;
10346 m->fs.sp_offset -= UNITS_PER_WORD;
10347
10348 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10349 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10350 add_reg_note (insn, REG_CFA_REGISTER,
10351 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10352 RTX_FRAME_RELATED_P (insn) = 1;
10353
10354 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10355 popc, -1, true);
10356 emit_jump_insn (gen_return_indirect_internal (ecx));
10357 }
10358 else
10359 emit_jump_insn (gen_return_pop_internal (popc));
10360 }
10361 else
10362 emit_jump_insn (gen_return_internal ());
10363
10364 /* Restore the state back to the state from the prologue,
10365 so that it's correct for the next epilogue. */
10366 m->fs = frame_state_save;
10367 }
10368
10369 /* Reset from the function's potential modifications. */
10370
10371 static void
10372 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10373 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10374 {
10375 if (pic_offset_table_rtx)
10376 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10377 #if TARGET_MACHO
10378 /* Mach-O doesn't support labels at the end of objects, so if
10379 it looks like we might want one, insert a NOP. */
10380 {
10381 rtx insn = get_last_insn ();
10382 while (insn
10383 && NOTE_P (insn)
10384 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10385 insn = PREV_INSN (insn);
10386 if (insn
10387 && (LABEL_P (insn)
10388 || (NOTE_P (insn)
10389 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10390 fputs ("\tnop\n", file);
10391 }
10392 #endif
10393
10394 }
10395
10396 /* Return a scratch register to use in the split stack prologue. The
10397 split stack prologue is used for -fsplit-stack. It is the first
10398 instructions in the function, even before the regular prologue.
10399 The scratch register can be any caller-saved register which is not
10400 used for parameters or for the static chain. */
10401
10402 static unsigned int
10403 split_stack_prologue_scratch_regno (void)
10404 {
10405 if (TARGET_64BIT)
10406 return R11_REG;
10407 else
10408 {
10409 bool is_fastcall;
10410 int regparm;
10411
10412 is_fastcall = (lookup_attribute ("fastcall",
10413 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10414 != NULL);
10415 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
10416
10417 if (is_fastcall)
10418 {
10419 if (DECL_STATIC_CHAIN (cfun->decl))
10420 {
10421 sorry ("-fsplit-stack does not support fastcall with "
10422 "nested function");
10423 return INVALID_REGNUM;
10424 }
10425 return AX_REG;
10426 }
10427 else if (regparm < 3)
10428 {
10429 if (!DECL_STATIC_CHAIN (cfun->decl))
10430 return CX_REG;
10431 else
10432 {
10433 if (regparm >= 2)
10434 {
10435 sorry ("-fsplit-stack does not support 2 register "
10436 " parameters for a nested function");
10437 return INVALID_REGNUM;
10438 }
10439 return DX_REG;
10440 }
10441 }
10442 else
10443 {
10444 /* FIXME: We could make this work by pushing a register
10445 around the addition and comparison. */
10446 sorry ("-fsplit-stack does not support 3 register parameters");
10447 return INVALID_REGNUM;
10448 }
10449 }
10450 }
10451
10452 /* A SYMBOL_REF for the function which allocates new stackspace for
10453 -fsplit-stack. */
10454
10455 static GTY(()) rtx split_stack_fn;
10456
10457 /* Handle -fsplit-stack. These are the first instructions in the
10458 function, even before the regular prologue. */
10459
10460 void
10461 ix86_expand_split_stack_prologue (void)
10462 {
10463 struct ix86_frame frame;
10464 HOST_WIDE_INT allocate;
10465 int args_size;
10466 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
10467 rtx scratch_reg = NULL_RTX;
10468 rtx varargs_label = NULL_RTX;
10469
10470 gcc_assert (flag_split_stack && reload_completed);
10471
10472 ix86_finalize_stack_realign_flags ();
10473 ix86_compute_frame_layout (&frame);
10474 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
10475
10476 /* This is the label we will branch to if we have enough stack
10477 space. We expect the basic block reordering pass to reverse this
10478 branch if optimizing, so that we branch in the unlikely case. */
10479 label = gen_label_rtx ();
10480
10481 /* We need to compare the stack pointer minus the frame size with
10482 the stack boundary in the TCB. The stack boundary always gives
10483 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10484 can compare directly. Otherwise we need to do an addition. */
10485
10486 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
10487 UNSPEC_STACK_CHECK);
10488 limit = gen_rtx_CONST (Pmode, limit);
10489 limit = gen_rtx_MEM (Pmode, limit);
10490 if (allocate < SPLIT_STACK_AVAILABLE)
10491 current = stack_pointer_rtx;
10492 else
10493 {
10494 unsigned int scratch_regno;
10495 rtx offset;
10496
10497 /* We need a scratch register to hold the stack pointer minus
10498 the required frame size. Since this is the very start of the
10499 function, the scratch register can be any caller-saved
10500 register which is not used for parameters. */
10501 offset = GEN_INT (- allocate);
10502 scratch_regno = split_stack_prologue_scratch_regno ();
10503 if (scratch_regno == INVALID_REGNUM)
10504 return;
10505 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10506 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
10507 {
10508 /* We don't use ix86_gen_add3 in this case because it will
10509 want to split to lea, but when not optimizing the insn
10510 will not be split after this point. */
10511 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10512 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10513 offset)));
10514 }
10515 else
10516 {
10517 emit_move_insn (scratch_reg, offset);
10518 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
10519 stack_pointer_rtx));
10520 }
10521 current = scratch_reg;
10522 }
10523
10524 ix86_expand_branch (GEU, current, limit, label);
10525 jump_insn = get_last_insn ();
10526 JUMP_LABEL (jump_insn) = label;
10527
10528 /* Mark the jump as very likely to be taken. */
10529 add_reg_note (jump_insn, REG_BR_PROB,
10530 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
10531
10532 /* Get more stack space. We pass in the desired stack space and the
10533 size of the arguments to copy to the new stack. In 32-bit mode
10534 we push the parameters; __morestack will return on a new stack
10535 anyhow. In 64-bit mode we pass the parameters in r10 and
10536 r11. */
10537 allocate_rtx = GEN_INT (allocate);
10538 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
10539 call_fusage = NULL_RTX;
10540 if (TARGET_64BIT)
10541 {
10542 rtx reg;
10543
10544 reg = gen_rtx_REG (Pmode, R10_REG);
10545
10546 /* If this function uses a static chain, it will be in %r10.
10547 Preserve it across the call to __morestack. */
10548 if (DECL_STATIC_CHAIN (cfun->decl))
10549 {
10550 rtx rax;
10551
10552 rax = gen_rtx_REG (Pmode, AX_REG);
10553 emit_move_insn (rax, reg);
10554 use_reg (&call_fusage, rax);
10555 }
10556
10557 emit_move_insn (reg, allocate_rtx);
10558 use_reg (&call_fusage, reg);
10559 reg = gen_rtx_REG (Pmode, R11_REG);
10560 emit_move_insn (reg, GEN_INT (args_size));
10561 use_reg (&call_fusage, reg);
10562 }
10563 else
10564 {
10565 emit_insn (gen_push (GEN_INT (args_size)));
10566 emit_insn (gen_push (allocate_rtx));
10567 }
10568 if (split_stack_fn == NULL_RTX)
10569 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
10570 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, split_stack_fn),
10571 GEN_INT (UNITS_PER_WORD), constm1_rtx,
10572 NULL_RTX, 0);
10573 add_function_usage_to (call_insn, call_fusage);
10574
10575 /* In order to make call/return prediction work right, we now need
10576 to execute a return instruction. See
10577 libgcc/config/i386/morestack.S for the details on how this works.
10578
10579 For flow purposes gcc must not see this as a return
10580 instruction--we need control flow to continue at the subsequent
10581 label. Therefore, we use an unspec. */
10582 gcc_assert (crtl->args.pops_args < 65536);
10583 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
10584
10585 /* If we are in 64-bit mode and this function uses a static chain,
10586 we saved %r10 in %rax before calling _morestack. */
10587 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
10588 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
10589 gen_rtx_REG (Pmode, AX_REG));
10590
10591 /* If this function calls va_start, we need to store a pointer to
10592 the arguments on the old stack, because they may not have been
10593 all copied to the new stack. At this point the old stack can be
10594 found at the frame pointer value used by __morestack, because
10595 __morestack has set that up before calling back to us. Here we
10596 store that pointer in a scratch register, and in
10597 ix86_expand_prologue we store the scratch register in a stack
10598 slot. */
10599 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10600 {
10601 unsigned int scratch_regno;
10602 rtx frame_reg;
10603 int words;
10604
10605 scratch_regno = split_stack_prologue_scratch_regno ();
10606 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10607 frame_reg = gen_rtx_REG (Pmode, BP_REG);
10608
10609 /* 64-bit:
10610 fp -> old fp value
10611 return address within this function
10612 return address of caller of this function
10613 stack arguments
10614 So we add three words to get to the stack arguments.
10615
10616 32-bit:
10617 fp -> old fp value
10618 return address within this function
10619 first argument to __morestack
10620 second argument to __morestack
10621 return address of caller of this function
10622 stack arguments
10623 So we add five words to get to the stack arguments.
10624 */
10625 words = TARGET_64BIT ? 3 : 5;
10626 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10627 gen_rtx_PLUS (Pmode, frame_reg,
10628 GEN_INT (words * UNITS_PER_WORD))));
10629
10630 varargs_label = gen_label_rtx ();
10631 emit_jump_insn (gen_jump (varargs_label));
10632 JUMP_LABEL (get_last_insn ()) = varargs_label;
10633
10634 emit_barrier ();
10635 }
10636
10637 emit_label (label);
10638 LABEL_NUSES (label) = 1;
10639
10640 /* If this function calls va_start, we now have to set the scratch
10641 register for the case where we do not call __morestack. In this
10642 case we need to set it based on the stack pointer. */
10643 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10644 {
10645 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10646 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10647 GEN_INT (UNITS_PER_WORD))));
10648
10649 emit_label (varargs_label);
10650 LABEL_NUSES (varargs_label) = 1;
10651 }
10652 }
10653
10654 /* We may have to tell the dataflow pass that the split stack prologue
10655 is initializing a scratch register. */
10656
10657 static void
10658 ix86_live_on_entry (bitmap regs)
10659 {
10660 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10661 {
10662 gcc_assert (flag_split_stack);
10663 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
10664 }
10665 }
10666 \f
10667 /* Extract the parts of an RTL expression that is a valid memory address
10668 for an instruction. Return 0 if the structure of the address is
10669 grossly off. Return -1 if the address contains ASHIFT, so it is not
10670 strictly valid, but still used for computing length of lea instruction. */
10671
10672 int
10673 ix86_decompose_address (rtx addr, struct ix86_address *out)
10674 {
10675 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
10676 rtx base_reg, index_reg;
10677 HOST_WIDE_INT scale = 1;
10678 rtx scale_rtx = NULL_RTX;
10679 rtx tmp;
10680 int retval = 1;
10681 enum ix86_address_seg seg = SEG_DEFAULT;
10682
10683 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
10684 base = addr;
10685 else if (GET_CODE (addr) == PLUS)
10686 {
10687 rtx addends[4], op;
10688 int n = 0, i;
10689
10690 op = addr;
10691 do
10692 {
10693 if (n >= 4)
10694 return 0;
10695 addends[n++] = XEXP (op, 1);
10696 op = XEXP (op, 0);
10697 }
10698 while (GET_CODE (op) == PLUS);
10699 if (n >= 4)
10700 return 0;
10701 addends[n] = op;
10702
10703 for (i = n; i >= 0; --i)
10704 {
10705 op = addends[i];
10706 switch (GET_CODE (op))
10707 {
10708 case MULT:
10709 if (index)
10710 return 0;
10711 index = XEXP (op, 0);
10712 scale_rtx = XEXP (op, 1);
10713 break;
10714
10715 case ASHIFT:
10716 if (index)
10717 return 0;
10718 index = XEXP (op, 0);
10719 tmp = XEXP (op, 1);
10720 if (!CONST_INT_P (tmp))
10721 return 0;
10722 scale = INTVAL (tmp);
10723 if ((unsigned HOST_WIDE_INT) scale > 3)
10724 return 0;
10725 scale = 1 << scale;
10726 break;
10727
10728 case UNSPEC:
10729 if (XINT (op, 1) == UNSPEC_TP
10730 && TARGET_TLS_DIRECT_SEG_REFS
10731 && seg == SEG_DEFAULT)
10732 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
10733 else
10734 return 0;
10735 break;
10736
10737 case REG:
10738 case SUBREG:
10739 if (!base)
10740 base = op;
10741 else if (!index)
10742 index = op;
10743 else
10744 return 0;
10745 break;
10746
10747 case CONST:
10748 case CONST_INT:
10749 case SYMBOL_REF:
10750 case LABEL_REF:
10751 if (disp)
10752 return 0;
10753 disp = op;
10754 break;
10755
10756 default:
10757 return 0;
10758 }
10759 }
10760 }
10761 else if (GET_CODE (addr) == MULT)
10762 {
10763 index = XEXP (addr, 0); /* index*scale */
10764 scale_rtx = XEXP (addr, 1);
10765 }
10766 else if (GET_CODE (addr) == ASHIFT)
10767 {
10768 /* We're called for lea too, which implements ashift on occasion. */
10769 index = XEXP (addr, 0);
10770 tmp = XEXP (addr, 1);
10771 if (!CONST_INT_P (tmp))
10772 return 0;
10773 scale = INTVAL (tmp);
10774 if ((unsigned HOST_WIDE_INT) scale > 3)
10775 return 0;
10776 scale = 1 << scale;
10777 retval = -1;
10778 }
10779 else
10780 disp = addr; /* displacement */
10781
10782 /* Extract the integral value of scale. */
10783 if (scale_rtx)
10784 {
10785 if (!CONST_INT_P (scale_rtx))
10786 return 0;
10787 scale = INTVAL (scale_rtx);
10788 }
10789
10790 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
10791 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
10792
10793 /* Avoid useless 0 displacement. */
10794 if (disp == const0_rtx && (base || index))
10795 disp = NULL_RTX;
10796
10797 /* Allow arg pointer and stack pointer as index if there is not scaling. */
10798 if (base_reg && index_reg && scale == 1
10799 && (index_reg == arg_pointer_rtx
10800 || index_reg == frame_pointer_rtx
10801 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
10802 {
10803 rtx tmp;
10804 tmp = base, base = index, index = tmp;
10805 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
10806 }
10807
10808 /* Special case: %ebp cannot be encoded as a base without a displacement.
10809 Similarly %r13. */
10810 if (!disp
10811 && base_reg
10812 && (base_reg == hard_frame_pointer_rtx
10813 || base_reg == frame_pointer_rtx
10814 || base_reg == arg_pointer_rtx
10815 || (REG_P (base_reg)
10816 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
10817 || REGNO (base_reg) == R13_REG))))
10818 disp = const0_rtx;
10819
10820 /* Special case: on K6, [%esi] makes the instruction vector decoded.
10821 Avoid this by transforming to [%esi+0].
10822 Reload calls address legitimization without cfun defined, so we need
10823 to test cfun for being non-NULL. */
10824 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
10825 && base_reg && !index_reg && !disp
10826 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
10827 disp = const0_rtx;
10828
10829 /* Special case: encode reg+reg instead of reg*2. */
10830 if (!base && index && scale == 2)
10831 base = index, base_reg = index_reg, scale = 1;
10832
10833 /* Special case: scaling cannot be encoded without base or displacement. */
10834 if (!base && !disp && index && scale != 1)
10835 disp = const0_rtx;
10836
10837 out->base = base;
10838 out->index = index;
10839 out->disp = disp;
10840 out->scale = scale;
10841 out->seg = seg;
10842
10843 return retval;
10844 }
10845 \f
10846 /* Return cost of the memory address x.
10847 For i386, it is better to use a complex address than let gcc copy
10848 the address into a reg and make a new pseudo. But not if the address
10849 requires to two regs - that would mean more pseudos with longer
10850 lifetimes. */
10851 static int
10852 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
10853 {
10854 struct ix86_address parts;
10855 int cost = 1;
10856 int ok = ix86_decompose_address (x, &parts);
10857
10858 gcc_assert (ok);
10859
10860 if (parts.base && GET_CODE (parts.base) == SUBREG)
10861 parts.base = SUBREG_REG (parts.base);
10862 if (parts.index && GET_CODE (parts.index) == SUBREG)
10863 parts.index = SUBREG_REG (parts.index);
10864
10865 /* Attempt to minimize number of registers in the address. */
10866 if ((parts.base
10867 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
10868 || (parts.index
10869 && (!REG_P (parts.index)
10870 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
10871 cost++;
10872
10873 if (parts.base
10874 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
10875 && parts.index
10876 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
10877 && parts.base != parts.index)
10878 cost++;
10879
10880 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
10881 since it's predecode logic can't detect the length of instructions
10882 and it degenerates to vector decoded. Increase cost of such
10883 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
10884 to split such addresses or even refuse such addresses at all.
10885
10886 Following addressing modes are affected:
10887 [base+scale*index]
10888 [scale*index+disp]
10889 [base+index]
10890
10891 The first and last case may be avoidable by explicitly coding the zero in
10892 memory address, but I don't have AMD-K6 machine handy to check this
10893 theory. */
10894
10895 if (TARGET_K6
10896 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
10897 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
10898 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
10899 cost += 10;
10900
10901 return cost;
10902 }
10903 \f
10904 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
10905 this is used for to form addresses to local data when -fPIC is in
10906 use. */
10907
10908 static bool
10909 darwin_local_data_pic (rtx disp)
10910 {
10911 return (GET_CODE (disp) == UNSPEC
10912 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
10913 }
10914
10915 /* Determine if a given RTX is a valid constant. We already know this
10916 satisfies CONSTANT_P. */
10917
10918 bool
10919 legitimate_constant_p (rtx x)
10920 {
10921 switch (GET_CODE (x))
10922 {
10923 case CONST:
10924 x = XEXP (x, 0);
10925
10926 if (GET_CODE (x) == PLUS)
10927 {
10928 if (!CONST_INT_P (XEXP (x, 1)))
10929 return false;
10930 x = XEXP (x, 0);
10931 }
10932
10933 if (TARGET_MACHO && darwin_local_data_pic (x))
10934 return true;
10935
10936 /* Only some unspecs are valid as "constants". */
10937 if (GET_CODE (x) == UNSPEC)
10938 switch (XINT (x, 1))
10939 {
10940 case UNSPEC_GOT:
10941 case UNSPEC_GOTOFF:
10942 case UNSPEC_PLTOFF:
10943 return TARGET_64BIT;
10944 case UNSPEC_TPOFF:
10945 case UNSPEC_NTPOFF:
10946 x = XVECEXP (x, 0, 0);
10947 return (GET_CODE (x) == SYMBOL_REF
10948 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10949 case UNSPEC_DTPOFF:
10950 x = XVECEXP (x, 0, 0);
10951 return (GET_CODE (x) == SYMBOL_REF
10952 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
10953 default:
10954 return false;
10955 }
10956
10957 /* We must have drilled down to a symbol. */
10958 if (GET_CODE (x) == LABEL_REF)
10959 return true;
10960 if (GET_CODE (x) != SYMBOL_REF)
10961 return false;
10962 /* FALLTHRU */
10963
10964 case SYMBOL_REF:
10965 /* TLS symbols are never valid. */
10966 if (SYMBOL_REF_TLS_MODEL (x))
10967 return false;
10968
10969 /* DLLIMPORT symbols are never valid. */
10970 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10971 && SYMBOL_REF_DLLIMPORT_P (x))
10972 return false;
10973 break;
10974
10975 case CONST_DOUBLE:
10976 if (GET_MODE (x) == TImode
10977 && x != CONST0_RTX (TImode)
10978 && !TARGET_64BIT)
10979 return false;
10980 break;
10981
10982 case CONST_VECTOR:
10983 if (!standard_sse_constant_p (x))
10984 return false;
10985
10986 default:
10987 break;
10988 }
10989
10990 /* Otherwise we handle everything else in the move patterns. */
10991 return true;
10992 }
10993
10994 /* Determine if it's legal to put X into the constant pool. This
10995 is not possible for the address of thread-local symbols, which
10996 is checked above. */
10997
10998 static bool
10999 ix86_cannot_force_const_mem (rtx x)
11000 {
11001 /* We can always put integral constants and vectors in memory. */
11002 switch (GET_CODE (x))
11003 {
11004 case CONST_INT:
11005 case CONST_DOUBLE:
11006 case CONST_VECTOR:
11007 return false;
11008
11009 default:
11010 break;
11011 }
11012 return !legitimate_constant_p (x);
11013 }
11014
11015
11016 /* Nonzero if the constant value X is a legitimate general operand
11017 when generating PIC code. It is given that flag_pic is on and
11018 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11019
11020 bool
11021 legitimate_pic_operand_p (rtx x)
11022 {
11023 rtx inner;
11024
11025 switch (GET_CODE (x))
11026 {
11027 case CONST:
11028 inner = XEXP (x, 0);
11029 if (GET_CODE (inner) == PLUS
11030 && CONST_INT_P (XEXP (inner, 1)))
11031 inner = XEXP (inner, 0);
11032
11033 /* Only some unspecs are valid as "constants". */
11034 if (GET_CODE (inner) == UNSPEC)
11035 switch (XINT (inner, 1))
11036 {
11037 case UNSPEC_GOT:
11038 case UNSPEC_GOTOFF:
11039 case UNSPEC_PLTOFF:
11040 return TARGET_64BIT;
11041 case UNSPEC_TPOFF:
11042 x = XVECEXP (inner, 0, 0);
11043 return (GET_CODE (x) == SYMBOL_REF
11044 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11045 case UNSPEC_MACHOPIC_OFFSET:
11046 return legitimate_pic_address_disp_p (x);
11047 default:
11048 return false;
11049 }
11050 /* FALLTHRU */
11051
11052 case SYMBOL_REF:
11053 case LABEL_REF:
11054 return legitimate_pic_address_disp_p (x);
11055
11056 default:
11057 return true;
11058 }
11059 }
11060
11061 /* Determine if a given CONST RTX is a valid memory displacement
11062 in PIC mode. */
11063
11064 bool
11065 legitimate_pic_address_disp_p (rtx disp)
11066 {
11067 bool saw_plus;
11068
11069 /* In 64bit mode we can allow direct addresses of symbols and labels
11070 when they are not dynamic symbols. */
11071 if (TARGET_64BIT)
11072 {
11073 rtx op0 = disp, op1;
11074
11075 switch (GET_CODE (disp))
11076 {
11077 case LABEL_REF:
11078 return true;
11079
11080 case CONST:
11081 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11082 break;
11083 op0 = XEXP (XEXP (disp, 0), 0);
11084 op1 = XEXP (XEXP (disp, 0), 1);
11085 if (!CONST_INT_P (op1)
11086 || INTVAL (op1) >= 16*1024*1024
11087 || INTVAL (op1) < -16*1024*1024)
11088 break;
11089 if (GET_CODE (op0) == LABEL_REF)
11090 return true;
11091 if (GET_CODE (op0) != SYMBOL_REF)
11092 break;
11093 /* FALLTHRU */
11094
11095 case SYMBOL_REF:
11096 /* TLS references should always be enclosed in UNSPEC. */
11097 if (SYMBOL_REF_TLS_MODEL (op0))
11098 return false;
11099 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
11100 && ix86_cmodel != CM_LARGE_PIC)
11101 return true;
11102 break;
11103
11104 default:
11105 break;
11106 }
11107 }
11108 if (GET_CODE (disp) != CONST)
11109 return false;
11110 disp = XEXP (disp, 0);
11111
11112 if (TARGET_64BIT)
11113 {
11114 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11115 of GOT tables. We should not need these anyway. */
11116 if (GET_CODE (disp) != UNSPEC
11117 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11118 && XINT (disp, 1) != UNSPEC_GOTOFF
11119 && XINT (disp, 1) != UNSPEC_PLTOFF))
11120 return false;
11121
11122 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11123 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11124 return false;
11125 return true;
11126 }
11127
11128 saw_plus = false;
11129 if (GET_CODE (disp) == PLUS)
11130 {
11131 if (!CONST_INT_P (XEXP (disp, 1)))
11132 return false;
11133 disp = XEXP (disp, 0);
11134 saw_plus = true;
11135 }
11136
11137 if (TARGET_MACHO && darwin_local_data_pic (disp))
11138 return true;
11139
11140 if (GET_CODE (disp) != UNSPEC)
11141 return false;
11142
11143 switch (XINT (disp, 1))
11144 {
11145 case UNSPEC_GOT:
11146 if (saw_plus)
11147 return false;
11148 /* We need to check for both symbols and labels because VxWorks loads
11149 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11150 details. */
11151 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11152 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11153 case UNSPEC_GOTOFF:
11154 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11155 While ABI specify also 32bit relocation but we don't produce it in
11156 small PIC model at all. */
11157 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11158 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11159 && !TARGET_64BIT)
11160 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11161 return false;
11162 case UNSPEC_GOTTPOFF:
11163 case UNSPEC_GOTNTPOFF:
11164 case UNSPEC_INDNTPOFF:
11165 if (saw_plus)
11166 return false;
11167 disp = XVECEXP (disp, 0, 0);
11168 return (GET_CODE (disp) == SYMBOL_REF
11169 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11170 case UNSPEC_NTPOFF:
11171 disp = XVECEXP (disp, 0, 0);
11172 return (GET_CODE (disp) == SYMBOL_REF
11173 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11174 case UNSPEC_DTPOFF:
11175 disp = XVECEXP (disp, 0, 0);
11176 return (GET_CODE (disp) == SYMBOL_REF
11177 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11178 }
11179
11180 return false;
11181 }
11182
11183 /* Recognizes RTL expressions that are valid memory addresses for an
11184 instruction. The MODE argument is the machine mode for the MEM
11185 expression that wants to use this address.
11186
11187 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11188 convert common non-canonical forms to canonical form so that they will
11189 be recognized. */
11190
11191 static bool
11192 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
11193 rtx addr, bool strict)
11194 {
11195 struct ix86_address parts;
11196 rtx base, index, disp;
11197 HOST_WIDE_INT scale;
11198
11199 if (ix86_decompose_address (addr, &parts) <= 0)
11200 /* Decomposition failed. */
11201 return false;
11202
11203 base = parts.base;
11204 index = parts.index;
11205 disp = parts.disp;
11206 scale = parts.scale;
11207
11208 /* Validate base register.
11209
11210 Don't allow SUBREG's that span more than a word here. It can lead to spill
11211 failures when the base is one word out of a two word structure, which is
11212 represented internally as a DImode int. */
11213
11214 if (base)
11215 {
11216 rtx reg;
11217
11218 if (REG_P (base))
11219 reg = base;
11220 else if (GET_CODE (base) == SUBREG
11221 && REG_P (SUBREG_REG (base))
11222 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
11223 <= UNITS_PER_WORD)
11224 reg = SUBREG_REG (base);
11225 else
11226 /* Base is not a register. */
11227 return false;
11228
11229 if (GET_MODE (base) != Pmode)
11230 /* Base is not in Pmode. */
11231 return false;
11232
11233 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
11234 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
11235 /* Base is not valid. */
11236 return false;
11237 }
11238
11239 /* Validate index register.
11240
11241 Don't allow SUBREG's that span more than a word here -- same as above. */
11242
11243 if (index)
11244 {
11245 rtx reg;
11246
11247 if (REG_P (index))
11248 reg = index;
11249 else if (GET_CODE (index) == SUBREG
11250 && REG_P (SUBREG_REG (index))
11251 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
11252 <= UNITS_PER_WORD)
11253 reg = SUBREG_REG (index);
11254 else
11255 /* Index is not a register. */
11256 return false;
11257
11258 if (GET_MODE (index) != Pmode)
11259 /* Index is not in Pmode. */
11260 return false;
11261
11262 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
11263 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
11264 /* Index is not valid. */
11265 return false;
11266 }
11267
11268 /* Validate scale factor. */
11269 if (scale != 1)
11270 {
11271 if (!index)
11272 /* Scale without index. */
11273 return false;
11274
11275 if (scale != 2 && scale != 4 && scale != 8)
11276 /* Scale is not a valid multiplier. */
11277 return false;
11278 }
11279
11280 /* Validate displacement. */
11281 if (disp)
11282 {
11283 if (GET_CODE (disp) == CONST
11284 && GET_CODE (XEXP (disp, 0)) == UNSPEC
11285 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
11286 switch (XINT (XEXP (disp, 0), 1))
11287 {
11288 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
11289 used. While ABI specify also 32bit relocations, we don't produce
11290 them at all and use IP relative instead. */
11291 case UNSPEC_GOT:
11292 case UNSPEC_GOTOFF:
11293 gcc_assert (flag_pic);
11294 if (!TARGET_64BIT)
11295 goto is_legitimate_pic;
11296
11297 /* 64bit address unspec. */
11298 return false;
11299
11300 case UNSPEC_GOTPCREL:
11301 gcc_assert (flag_pic);
11302 goto is_legitimate_pic;
11303
11304 case UNSPEC_GOTTPOFF:
11305 case UNSPEC_GOTNTPOFF:
11306 case UNSPEC_INDNTPOFF:
11307 case UNSPEC_NTPOFF:
11308 case UNSPEC_DTPOFF:
11309 break;
11310
11311 case UNSPEC_STACK_CHECK:
11312 gcc_assert (flag_split_stack);
11313 break;
11314
11315 default:
11316 /* Invalid address unspec. */
11317 return false;
11318 }
11319
11320 else if (SYMBOLIC_CONST (disp)
11321 && (flag_pic
11322 || (TARGET_MACHO
11323 #if TARGET_MACHO
11324 && MACHOPIC_INDIRECT
11325 && !machopic_operand_p (disp)
11326 #endif
11327 )))
11328 {
11329
11330 is_legitimate_pic:
11331 if (TARGET_64BIT && (index || base))
11332 {
11333 /* foo@dtpoff(%rX) is ok. */
11334 if (GET_CODE (disp) != CONST
11335 || GET_CODE (XEXP (disp, 0)) != PLUS
11336 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
11337 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
11338 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
11339 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
11340 /* Non-constant pic memory reference. */
11341 return false;
11342 }
11343 else if (! legitimate_pic_address_disp_p (disp))
11344 /* Displacement is an invalid pic construct. */
11345 return false;
11346
11347 /* This code used to verify that a symbolic pic displacement
11348 includes the pic_offset_table_rtx register.
11349
11350 While this is good idea, unfortunately these constructs may
11351 be created by "adds using lea" optimization for incorrect
11352 code like:
11353
11354 int a;
11355 int foo(int i)
11356 {
11357 return *(&a+i);
11358 }
11359
11360 This code is nonsensical, but results in addressing
11361 GOT table with pic_offset_table_rtx base. We can't
11362 just refuse it easily, since it gets matched by
11363 "addsi3" pattern, that later gets split to lea in the
11364 case output register differs from input. While this
11365 can be handled by separate addsi pattern for this case
11366 that never results in lea, this seems to be easier and
11367 correct fix for crash to disable this test. */
11368 }
11369 else if (GET_CODE (disp) != LABEL_REF
11370 && !CONST_INT_P (disp)
11371 && (GET_CODE (disp) != CONST
11372 || !legitimate_constant_p (disp))
11373 && (GET_CODE (disp) != SYMBOL_REF
11374 || !legitimate_constant_p (disp)))
11375 /* Displacement is not constant. */
11376 return false;
11377 else if (TARGET_64BIT
11378 && !x86_64_immediate_operand (disp, VOIDmode))
11379 /* Displacement is out of range. */
11380 return false;
11381 }
11382
11383 /* Everything looks valid. */
11384 return true;
11385 }
11386
11387 /* Determine if a given RTX is a valid constant address. */
11388
11389 bool
11390 constant_address_p (rtx x)
11391 {
11392 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
11393 }
11394 \f
11395 /* Return a unique alias set for the GOT. */
11396
11397 static alias_set_type
11398 ix86_GOT_alias_set (void)
11399 {
11400 static alias_set_type set = -1;
11401 if (set == -1)
11402 set = new_alias_set ();
11403 return set;
11404 }
11405
11406 /* Return a legitimate reference for ORIG (an address) using the
11407 register REG. If REG is 0, a new pseudo is generated.
11408
11409 There are two types of references that must be handled:
11410
11411 1. Global data references must load the address from the GOT, via
11412 the PIC reg. An insn is emitted to do this load, and the reg is
11413 returned.
11414
11415 2. Static data references, constant pool addresses, and code labels
11416 compute the address as an offset from the GOT, whose base is in
11417 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11418 differentiate them from global data objects. The returned
11419 address is the PIC reg + an unspec constant.
11420
11421 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11422 reg also appears in the address. */
11423
11424 static rtx
11425 legitimize_pic_address (rtx orig, rtx reg)
11426 {
11427 rtx addr = orig;
11428 rtx new_rtx = orig;
11429 rtx base;
11430
11431 #if TARGET_MACHO
11432 if (TARGET_MACHO && !TARGET_64BIT)
11433 {
11434 if (reg == 0)
11435 reg = gen_reg_rtx (Pmode);
11436 /* Use the generic Mach-O PIC machinery. */
11437 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
11438 }
11439 #endif
11440
11441 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
11442 new_rtx = addr;
11443 else if (TARGET_64BIT
11444 && ix86_cmodel != CM_SMALL_PIC
11445 && gotoff_operand (addr, Pmode))
11446 {
11447 rtx tmpreg;
11448 /* This symbol may be referenced via a displacement from the PIC
11449 base address (@GOTOFF). */
11450
11451 if (reload_in_progress)
11452 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11453 if (GET_CODE (addr) == CONST)
11454 addr = XEXP (addr, 0);
11455 if (GET_CODE (addr) == PLUS)
11456 {
11457 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11458 UNSPEC_GOTOFF);
11459 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11460 }
11461 else
11462 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11463 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11464 if (!reg)
11465 tmpreg = gen_reg_rtx (Pmode);
11466 else
11467 tmpreg = reg;
11468 emit_move_insn (tmpreg, new_rtx);
11469
11470 if (reg != 0)
11471 {
11472 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
11473 tmpreg, 1, OPTAB_DIRECT);
11474 new_rtx = reg;
11475 }
11476 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
11477 }
11478 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
11479 {
11480 /* This symbol may be referenced via a displacement from the PIC
11481 base address (@GOTOFF). */
11482
11483 if (reload_in_progress)
11484 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11485 if (GET_CODE (addr) == CONST)
11486 addr = XEXP (addr, 0);
11487 if (GET_CODE (addr) == PLUS)
11488 {
11489 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11490 UNSPEC_GOTOFF);
11491 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11492 }
11493 else
11494 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11495 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11496 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11497
11498 if (reg != 0)
11499 {
11500 emit_move_insn (reg, new_rtx);
11501 new_rtx = reg;
11502 }
11503 }
11504 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
11505 /* We can't use @GOTOFF for text labels on VxWorks;
11506 see gotoff_operand. */
11507 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
11508 {
11509 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11510 {
11511 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
11512 return legitimize_dllimport_symbol (addr, true);
11513 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
11514 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11515 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
11516 {
11517 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
11518 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11519 }
11520 }
11521
11522 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
11523 {
11524 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
11525 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11526 new_rtx = gen_const_mem (Pmode, new_rtx);
11527 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11528
11529 if (reg == 0)
11530 reg = gen_reg_rtx (Pmode);
11531 /* Use directly gen_movsi, otherwise the address is loaded
11532 into register for CSE. We don't want to CSE this addresses,
11533 instead we CSE addresses from the GOT table, so skip this. */
11534 emit_insn (gen_movsi (reg, new_rtx));
11535 new_rtx = reg;
11536 }
11537 else
11538 {
11539 /* This symbol must be referenced via a load from the
11540 Global Offset Table (@GOT). */
11541
11542 if (reload_in_progress)
11543 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11544 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
11545 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11546 if (TARGET_64BIT)
11547 new_rtx = force_reg (Pmode, new_rtx);
11548 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11549 new_rtx = gen_const_mem (Pmode, new_rtx);
11550 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11551
11552 if (reg == 0)
11553 reg = gen_reg_rtx (Pmode);
11554 emit_move_insn (reg, new_rtx);
11555 new_rtx = reg;
11556 }
11557 }
11558 else
11559 {
11560 if (CONST_INT_P (addr)
11561 && !x86_64_immediate_operand (addr, VOIDmode))
11562 {
11563 if (reg)
11564 {
11565 emit_move_insn (reg, addr);
11566 new_rtx = reg;
11567 }
11568 else
11569 new_rtx = force_reg (Pmode, addr);
11570 }
11571 else if (GET_CODE (addr) == CONST)
11572 {
11573 addr = XEXP (addr, 0);
11574
11575 /* We must match stuff we generate before. Assume the only
11576 unspecs that can get here are ours. Not that we could do
11577 anything with them anyway.... */
11578 if (GET_CODE (addr) == UNSPEC
11579 || (GET_CODE (addr) == PLUS
11580 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
11581 return orig;
11582 gcc_assert (GET_CODE (addr) == PLUS);
11583 }
11584 if (GET_CODE (addr) == PLUS)
11585 {
11586 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
11587
11588 /* Check first to see if this is a constant offset from a @GOTOFF
11589 symbol reference. */
11590 if (gotoff_operand (op0, Pmode)
11591 && CONST_INT_P (op1))
11592 {
11593 if (!TARGET_64BIT)
11594 {
11595 if (reload_in_progress)
11596 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11597 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
11598 UNSPEC_GOTOFF);
11599 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
11600 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11601 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11602
11603 if (reg != 0)
11604 {
11605 emit_move_insn (reg, new_rtx);
11606 new_rtx = reg;
11607 }
11608 }
11609 else
11610 {
11611 if (INTVAL (op1) < -16*1024*1024
11612 || INTVAL (op1) >= 16*1024*1024)
11613 {
11614 if (!x86_64_immediate_operand (op1, Pmode))
11615 op1 = force_reg (Pmode, op1);
11616 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
11617 }
11618 }
11619 }
11620 else
11621 {
11622 base = legitimize_pic_address (XEXP (addr, 0), reg);
11623 new_rtx = legitimize_pic_address (XEXP (addr, 1),
11624 base == reg ? NULL_RTX : reg);
11625
11626 if (CONST_INT_P (new_rtx))
11627 new_rtx = plus_constant (base, INTVAL (new_rtx));
11628 else
11629 {
11630 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
11631 {
11632 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
11633 new_rtx = XEXP (new_rtx, 1);
11634 }
11635 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
11636 }
11637 }
11638 }
11639 }
11640 return new_rtx;
11641 }
11642 \f
11643 /* Load the thread pointer. If TO_REG is true, force it into a register. */
11644
11645 static rtx
11646 get_thread_pointer (int to_reg)
11647 {
11648 rtx tp, reg, insn;
11649
11650 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
11651 if (!to_reg)
11652 return tp;
11653
11654 reg = gen_reg_rtx (Pmode);
11655 insn = gen_rtx_SET (VOIDmode, reg, tp);
11656 insn = emit_insn (insn);
11657
11658 return reg;
11659 }
11660
11661 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
11662 false if we expect this to be used for a memory address and true if
11663 we expect to load the address into a register. */
11664
11665 static rtx
11666 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
11667 {
11668 rtx dest, base, off, pic, tp;
11669 int type;
11670
11671 switch (model)
11672 {
11673 case TLS_MODEL_GLOBAL_DYNAMIC:
11674 dest = gen_reg_rtx (Pmode);
11675 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11676
11677 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11678 {
11679 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
11680
11681 start_sequence ();
11682 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
11683 insns = get_insns ();
11684 end_sequence ();
11685
11686 RTL_CONST_CALL_P (insns) = 1;
11687 emit_libcall_block (insns, dest, rax, x);
11688 }
11689 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11690 emit_insn (gen_tls_global_dynamic_64 (dest, x));
11691 else
11692 emit_insn (gen_tls_global_dynamic_32 (dest, x));
11693
11694 if (TARGET_GNU2_TLS)
11695 {
11696 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
11697
11698 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11699 }
11700 break;
11701
11702 case TLS_MODEL_LOCAL_DYNAMIC:
11703 base = gen_reg_rtx (Pmode);
11704 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11705
11706 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11707 {
11708 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
11709
11710 start_sequence ();
11711 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
11712 insns = get_insns ();
11713 end_sequence ();
11714
11715 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
11716 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
11717 RTL_CONST_CALL_P (insns) = 1;
11718 emit_libcall_block (insns, base, rax, note);
11719 }
11720 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11721 emit_insn (gen_tls_local_dynamic_base_64 (base));
11722 else
11723 emit_insn (gen_tls_local_dynamic_base_32 (base));
11724
11725 if (TARGET_GNU2_TLS)
11726 {
11727 rtx x = ix86_tls_module_base ();
11728
11729 set_unique_reg_note (get_last_insn (), REG_EQUIV,
11730 gen_rtx_MINUS (Pmode, x, tp));
11731 }
11732
11733 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
11734 off = gen_rtx_CONST (Pmode, off);
11735
11736 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
11737
11738 if (TARGET_GNU2_TLS)
11739 {
11740 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
11741
11742 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11743 }
11744
11745 break;
11746
11747 case TLS_MODEL_INITIAL_EXEC:
11748 if (TARGET_64BIT)
11749 {
11750 pic = NULL;
11751 type = UNSPEC_GOTNTPOFF;
11752 }
11753 else if (flag_pic)
11754 {
11755 if (reload_in_progress)
11756 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11757 pic = pic_offset_table_rtx;
11758 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
11759 }
11760 else if (!TARGET_ANY_GNU_TLS)
11761 {
11762 pic = gen_reg_rtx (Pmode);
11763 emit_insn (gen_set_got (pic));
11764 type = UNSPEC_GOTTPOFF;
11765 }
11766 else
11767 {
11768 pic = NULL;
11769 type = UNSPEC_INDNTPOFF;
11770 }
11771
11772 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
11773 off = gen_rtx_CONST (Pmode, off);
11774 if (pic)
11775 off = gen_rtx_PLUS (Pmode, pic, off);
11776 off = gen_const_mem (Pmode, off);
11777 set_mem_alias_set (off, ix86_GOT_alias_set ());
11778
11779 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11780 {
11781 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11782 off = force_reg (Pmode, off);
11783 return gen_rtx_PLUS (Pmode, base, off);
11784 }
11785 else
11786 {
11787 base = get_thread_pointer (true);
11788 dest = gen_reg_rtx (Pmode);
11789 emit_insn (gen_subsi3 (dest, base, off));
11790 }
11791 break;
11792
11793 case TLS_MODEL_LOCAL_EXEC:
11794 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
11795 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11796 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
11797 off = gen_rtx_CONST (Pmode, off);
11798
11799 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11800 {
11801 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11802 return gen_rtx_PLUS (Pmode, base, off);
11803 }
11804 else
11805 {
11806 base = get_thread_pointer (true);
11807 dest = gen_reg_rtx (Pmode);
11808 emit_insn (gen_subsi3 (dest, base, off));
11809 }
11810 break;
11811
11812 default:
11813 gcc_unreachable ();
11814 }
11815
11816 return dest;
11817 }
11818
11819 /* Create or return the unique __imp_DECL dllimport symbol corresponding
11820 to symbol DECL. */
11821
11822 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
11823 htab_t dllimport_map;
11824
11825 static tree
11826 get_dllimport_decl (tree decl)
11827 {
11828 struct tree_map *h, in;
11829 void **loc;
11830 const char *name;
11831 const char *prefix;
11832 size_t namelen, prefixlen;
11833 char *imp_name;
11834 tree to;
11835 rtx rtl;
11836
11837 if (!dllimport_map)
11838 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
11839
11840 in.hash = htab_hash_pointer (decl);
11841 in.base.from = decl;
11842 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
11843 h = (struct tree_map *) *loc;
11844 if (h)
11845 return h->to;
11846
11847 *loc = h = ggc_alloc_tree_map ();
11848 h->hash = in.hash;
11849 h->base.from = decl;
11850 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
11851 VAR_DECL, NULL, ptr_type_node);
11852 DECL_ARTIFICIAL (to) = 1;
11853 DECL_IGNORED_P (to) = 1;
11854 DECL_EXTERNAL (to) = 1;
11855 TREE_READONLY (to) = 1;
11856
11857 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
11858 name = targetm.strip_name_encoding (name);
11859 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
11860 ? "*__imp_" : "*__imp__";
11861 namelen = strlen (name);
11862 prefixlen = strlen (prefix);
11863 imp_name = (char *) alloca (namelen + prefixlen + 1);
11864 memcpy (imp_name, prefix, prefixlen);
11865 memcpy (imp_name + prefixlen, name, namelen + 1);
11866
11867 name = ggc_alloc_string (imp_name, namelen + prefixlen);
11868 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
11869 SET_SYMBOL_REF_DECL (rtl, to);
11870 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
11871
11872 rtl = gen_const_mem (Pmode, rtl);
11873 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
11874
11875 SET_DECL_RTL (to, rtl);
11876 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
11877
11878 return to;
11879 }
11880
11881 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
11882 true if we require the result be a register. */
11883
11884 static rtx
11885 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
11886 {
11887 tree imp_decl;
11888 rtx x;
11889
11890 gcc_assert (SYMBOL_REF_DECL (symbol));
11891 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
11892
11893 x = DECL_RTL (imp_decl);
11894 if (want_reg)
11895 x = force_reg (Pmode, x);
11896 return x;
11897 }
11898
11899 /* Try machine-dependent ways of modifying an illegitimate address
11900 to be legitimate. If we find one, return the new, valid address.
11901 This macro is used in only one place: `memory_address' in explow.c.
11902
11903 OLDX is the address as it was before break_out_memory_refs was called.
11904 In some cases it is useful to look at this to decide what needs to be done.
11905
11906 It is always safe for this macro to do nothing. It exists to recognize
11907 opportunities to optimize the output.
11908
11909 For the 80386, we handle X+REG by loading X into a register R and
11910 using R+REG. R will go in a general reg and indexing will be used.
11911 However, if REG is a broken-out memory address or multiplication,
11912 nothing needs to be done because REG can certainly go in a general reg.
11913
11914 When -fpic is used, special handling is needed for symbolic references.
11915 See comments by legitimize_pic_address in i386.c for details. */
11916
11917 static rtx
11918 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
11919 enum machine_mode mode)
11920 {
11921 int changed = 0;
11922 unsigned log;
11923
11924 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
11925 if (log)
11926 return legitimize_tls_address (x, (enum tls_model) log, false);
11927 if (GET_CODE (x) == CONST
11928 && GET_CODE (XEXP (x, 0)) == PLUS
11929 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11930 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
11931 {
11932 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
11933 (enum tls_model) log, false);
11934 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11935 }
11936
11937 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11938 {
11939 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
11940 return legitimize_dllimport_symbol (x, true);
11941 if (GET_CODE (x) == CONST
11942 && GET_CODE (XEXP (x, 0)) == PLUS
11943 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11944 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
11945 {
11946 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
11947 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11948 }
11949 }
11950
11951 if (flag_pic && SYMBOLIC_CONST (x))
11952 return legitimize_pic_address (x, 0);
11953
11954 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
11955 if (GET_CODE (x) == ASHIFT
11956 && CONST_INT_P (XEXP (x, 1))
11957 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
11958 {
11959 changed = 1;
11960 log = INTVAL (XEXP (x, 1));
11961 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
11962 GEN_INT (1 << log));
11963 }
11964
11965 if (GET_CODE (x) == PLUS)
11966 {
11967 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
11968
11969 if (GET_CODE (XEXP (x, 0)) == ASHIFT
11970 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11971 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
11972 {
11973 changed = 1;
11974 log = INTVAL (XEXP (XEXP (x, 0), 1));
11975 XEXP (x, 0) = gen_rtx_MULT (Pmode,
11976 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
11977 GEN_INT (1 << log));
11978 }
11979
11980 if (GET_CODE (XEXP (x, 1)) == ASHIFT
11981 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11982 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
11983 {
11984 changed = 1;
11985 log = INTVAL (XEXP (XEXP (x, 1), 1));
11986 XEXP (x, 1) = gen_rtx_MULT (Pmode,
11987 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
11988 GEN_INT (1 << log));
11989 }
11990
11991 /* Put multiply first if it isn't already. */
11992 if (GET_CODE (XEXP (x, 1)) == MULT)
11993 {
11994 rtx tmp = XEXP (x, 0);
11995 XEXP (x, 0) = XEXP (x, 1);
11996 XEXP (x, 1) = tmp;
11997 changed = 1;
11998 }
11999
12000 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12001 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12002 created by virtual register instantiation, register elimination, and
12003 similar optimizations. */
12004 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12005 {
12006 changed = 1;
12007 x = gen_rtx_PLUS (Pmode,
12008 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12009 XEXP (XEXP (x, 1), 0)),
12010 XEXP (XEXP (x, 1), 1));
12011 }
12012
12013 /* Canonicalize
12014 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12015 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12016 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12017 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12018 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12019 && CONSTANT_P (XEXP (x, 1)))
12020 {
12021 rtx constant;
12022 rtx other = NULL_RTX;
12023
12024 if (CONST_INT_P (XEXP (x, 1)))
12025 {
12026 constant = XEXP (x, 1);
12027 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12028 }
12029 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12030 {
12031 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12032 other = XEXP (x, 1);
12033 }
12034 else
12035 constant = 0;
12036
12037 if (constant)
12038 {
12039 changed = 1;
12040 x = gen_rtx_PLUS (Pmode,
12041 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12042 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12043 plus_constant (other, INTVAL (constant)));
12044 }
12045 }
12046
12047 if (changed && ix86_legitimate_address_p (mode, x, false))
12048 return x;
12049
12050 if (GET_CODE (XEXP (x, 0)) == MULT)
12051 {
12052 changed = 1;
12053 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
12054 }
12055
12056 if (GET_CODE (XEXP (x, 1)) == MULT)
12057 {
12058 changed = 1;
12059 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
12060 }
12061
12062 if (changed
12063 && REG_P (XEXP (x, 1))
12064 && REG_P (XEXP (x, 0)))
12065 return x;
12066
12067 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12068 {
12069 changed = 1;
12070 x = legitimize_pic_address (x, 0);
12071 }
12072
12073 if (changed && ix86_legitimate_address_p (mode, x, false))
12074 return x;
12075
12076 if (REG_P (XEXP (x, 0)))
12077 {
12078 rtx temp = gen_reg_rtx (Pmode);
12079 rtx val = force_operand (XEXP (x, 1), temp);
12080 if (val != temp)
12081 emit_move_insn (temp, val);
12082
12083 XEXP (x, 1) = temp;
12084 return x;
12085 }
12086
12087 else if (REG_P (XEXP (x, 1)))
12088 {
12089 rtx temp = gen_reg_rtx (Pmode);
12090 rtx val = force_operand (XEXP (x, 0), temp);
12091 if (val != temp)
12092 emit_move_insn (temp, val);
12093
12094 XEXP (x, 0) = temp;
12095 return x;
12096 }
12097 }
12098
12099 return x;
12100 }
12101 \f
12102 /* Print an integer constant expression in assembler syntax. Addition
12103 and subtraction are the only arithmetic that may appear in these
12104 expressions. FILE is the stdio stream to write to, X is the rtx, and
12105 CODE is the operand print code from the output string. */
12106
12107 static void
12108 output_pic_addr_const (FILE *file, rtx x, int code)
12109 {
12110 char buf[256];
12111
12112 switch (GET_CODE (x))
12113 {
12114 case PC:
12115 gcc_assert (flag_pic);
12116 putc ('.', file);
12117 break;
12118
12119 case SYMBOL_REF:
12120 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
12121 output_addr_const (file, x);
12122 else
12123 {
12124 const char *name = XSTR (x, 0);
12125
12126 /* Mark the decl as referenced so that cgraph will
12127 output the function. */
12128 if (SYMBOL_REF_DECL (x))
12129 mark_decl_referenced (SYMBOL_REF_DECL (x));
12130
12131 #if TARGET_MACHO
12132 if (MACHOPIC_INDIRECT
12133 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12134 name = machopic_indirection_name (x, /*stub_p=*/true);
12135 #endif
12136 assemble_name (file, name);
12137 }
12138 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12139 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
12140 fputs ("@PLT", file);
12141 break;
12142
12143 case LABEL_REF:
12144 x = XEXP (x, 0);
12145 /* FALLTHRU */
12146 case CODE_LABEL:
12147 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
12148 assemble_name (asm_out_file, buf);
12149 break;
12150
12151 case CONST_INT:
12152 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12153 break;
12154
12155 case CONST:
12156 /* This used to output parentheses around the expression,
12157 but that does not work on the 386 (either ATT or BSD assembler). */
12158 output_pic_addr_const (file, XEXP (x, 0), code);
12159 break;
12160
12161 case CONST_DOUBLE:
12162 if (GET_MODE (x) == VOIDmode)
12163 {
12164 /* We can use %d if the number is <32 bits and positive. */
12165 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
12166 fprintf (file, "0x%lx%08lx",
12167 (unsigned long) CONST_DOUBLE_HIGH (x),
12168 (unsigned long) CONST_DOUBLE_LOW (x));
12169 else
12170 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
12171 }
12172 else
12173 /* We can't handle floating point constants;
12174 TARGET_PRINT_OPERAND must handle them. */
12175 output_operand_lossage ("floating constant misused");
12176 break;
12177
12178 case PLUS:
12179 /* Some assemblers need integer constants to appear first. */
12180 if (CONST_INT_P (XEXP (x, 0)))
12181 {
12182 output_pic_addr_const (file, XEXP (x, 0), code);
12183 putc ('+', file);
12184 output_pic_addr_const (file, XEXP (x, 1), code);
12185 }
12186 else
12187 {
12188 gcc_assert (CONST_INT_P (XEXP (x, 1)));
12189 output_pic_addr_const (file, XEXP (x, 1), code);
12190 putc ('+', file);
12191 output_pic_addr_const (file, XEXP (x, 0), code);
12192 }
12193 break;
12194
12195 case MINUS:
12196 if (!TARGET_MACHO)
12197 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
12198 output_pic_addr_const (file, XEXP (x, 0), code);
12199 putc ('-', file);
12200 output_pic_addr_const (file, XEXP (x, 1), code);
12201 if (!TARGET_MACHO)
12202 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
12203 break;
12204
12205 case UNSPEC:
12206 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
12207 {
12208 bool f = i386_asm_output_addr_const_extra (file, x);
12209 gcc_assert (f);
12210 break;
12211 }
12212
12213 gcc_assert (XVECLEN (x, 0) == 1);
12214 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
12215 switch (XINT (x, 1))
12216 {
12217 case UNSPEC_GOT:
12218 fputs ("@GOT", file);
12219 break;
12220 case UNSPEC_GOTOFF:
12221 fputs ("@GOTOFF", file);
12222 break;
12223 case UNSPEC_PLTOFF:
12224 fputs ("@PLTOFF", file);
12225 break;
12226 case UNSPEC_GOTPCREL:
12227 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12228 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
12229 break;
12230 case UNSPEC_GOTTPOFF:
12231 /* FIXME: This might be @TPOFF in Sun ld too. */
12232 fputs ("@gottpoff", file);
12233 break;
12234 case UNSPEC_TPOFF:
12235 fputs ("@tpoff", file);
12236 break;
12237 case UNSPEC_NTPOFF:
12238 if (TARGET_64BIT)
12239 fputs ("@tpoff", file);
12240 else
12241 fputs ("@ntpoff", file);
12242 break;
12243 case UNSPEC_DTPOFF:
12244 fputs ("@dtpoff", file);
12245 break;
12246 case UNSPEC_GOTNTPOFF:
12247 if (TARGET_64BIT)
12248 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12249 "@gottpoff(%rip)": "@gottpoff[rip]", file);
12250 else
12251 fputs ("@gotntpoff", file);
12252 break;
12253 case UNSPEC_INDNTPOFF:
12254 fputs ("@indntpoff", file);
12255 break;
12256 #if TARGET_MACHO
12257 case UNSPEC_MACHOPIC_OFFSET:
12258 putc ('-', file);
12259 machopic_output_function_base_name (file);
12260 break;
12261 #endif
12262 default:
12263 output_operand_lossage ("invalid UNSPEC as operand");
12264 break;
12265 }
12266 break;
12267
12268 default:
12269 output_operand_lossage ("invalid expression as operand");
12270 }
12271 }
12272
12273 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12274 We need to emit DTP-relative relocations. */
12275
12276 static void ATTRIBUTE_UNUSED
12277 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
12278 {
12279 fputs (ASM_LONG, file);
12280 output_addr_const (file, x);
12281 fputs ("@dtpoff", file);
12282 switch (size)
12283 {
12284 case 4:
12285 break;
12286 case 8:
12287 fputs (", 0", file);
12288 break;
12289 default:
12290 gcc_unreachable ();
12291 }
12292 }
12293
12294 /* Return true if X is a representation of the PIC register. This copes
12295 with calls from ix86_find_base_term, where the register might have
12296 been replaced by a cselib value. */
12297
12298 static bool
12299 ix86_pic_register_p (rtx x)
12300 {
12301 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
12302 return (pic_offset_table_rtx
12303 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
12304 else
12305 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
12306 }
12307
12308 /* Helper function for ix86_delegitimize_address.
12309 Attempt to delegitimize TLS local-exec accesses. */
12310
12311 static rtx
12312 ix86_delegitimize_tls_address (rtx orig_x)
12313 {
12314 rtx x = orig_x, unspec;
12315 struct ix86_address addr;
12316
12317 if (!TARGET_TLS_DIRECT_SEG_REFS)
12318 return orig_x;
12319 if (MEM_P (x))
12320 x = XEXP (x, 0);
12321 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
12322 return orig_x;
12323 if (ix86_decompose_address (x, &addr) == 0
12324 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
12325 || addr.disp == NULL_RTX
12326 || GET_CODE (addr.disp) != CONST)
12327 return orig_x;
12328 unspec = XEXP (addr.disp, 0);
12329 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
12330 unspec = XEXP (unspec, 0);
12331 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
12332 return orig_x;
12333 x = XVECEXP (unspec, 0, 0);
12334 gcc_assert (GET_CODE (x) == SYMBOL_REF);
12335 if (unspec != XEXP (addr.disp, 0))
12336 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
12337 if (addr.index)
12338 {
12339 rtx idx = addr.index;
12340 if (addr.scale != 1)
12341 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
12342 x = gen_rtx_PLUS (Pmode, idx, x);
12343 }
12344 if (addr.base)
12345 x = gen_rtx_PLUS (Pmode, addr.base, x);
12346 if (MEM_P (orig_x))
12347 x = replace_equiv_address_nv (orig_x, x);
12348 return x;
12349 }
12350
12351 /* In the name of slightly smaller debug output, and to cater to
12352 general assembler lossage, recognize PIC+GOTOFF and turn it back
12353 into a direct symbol reference.
12354
12355 On Darwin, this is necessary to avoid a crash, because Darwin
12356 has a different PIC label for each routine but the DWARF debugging
12357 information is not associated with any particular routine, so it's
12358 necessary to remove references to the PIC label from RTL stored by
12359 the DWARF output code. */
12360
12361 static rtx
12362 ix86_delegitimize_address (rtx x)
12363 {
12364 rtx orig_x = delegitimize_mem_from_attrs (x);
12365 /* addend is NULL or some rtx if x is something+GOTOFF where
12366 something doesn't include the PIC register. */
12367 rtx addend = NULL_RTX;
12368 /* reg_addend is NULL or a multiple of some register. */
12369 rtx reg_addend = NULL_RTX;
12370 /* const_addend is NULL or a const_int. */
12371 rtx const_addend = NULL_RTX;
12372 /* This is the result, or NULL. */
12373 rtx result = NULL_RTX;
12374
12375 x = orig_x;
12376
12377 if (MEM_P (x))
12378 x = XEXP (x, 0);
12379
12380 if (TARGET_64BIT)
12381 {
12382 if (GET_CODE (x) != CONST
12383 || GET_CODE (XEXP (x, 0)) != UNSPEC
12384 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
12385 || !MEM_P (orig_x))
12386 return ix86_delegitimize_tls_address (orig_x);
12387 x = XVECEXP (XEXP (x, 0), 0, 0);
12388 if (GET_MODE (orig_x) != Pmode)
12389 return simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
12390 return x;
12391 }
12392
12393 if (GET_CODE (x) != PLUS
12394 || GET_CODE (XEXP (x, 1)) != CONST)
12395 return ix86_delegitimize_tls_address (orig_x);
12396
12397 if (ix86_pic_register_p (XEXP (x, 0)))
12398 /* %ebx + GOT/GOTOFF */
12399 ;
12400 else if (GET_CODE (XEXP (x, 0)) == PLUS)
12401 {
12402 /* %ebx + %reg * scale + GOT/GOTOFF */
12403 reg_addend = XEXP (x, 0);
12404 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
12405 reg_addend = XEXP (reg_addend, 1);
12406 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
12407 reg_addend = XEXP (reg_addend, 0);
12408 else
12409 {
12410 reg_addend = NULL_RTX;
12411 addend = XEXP (x, 0);
12412 }
12413 }
12414 else
12415 addend = XEXP (x, 0);
12416
12417 x = XEXP (XEXP (x, 1), 0);
12418 if (GET_CODE (x) == PLUS
12419 && CONST_INT_P (XEXP (x, 1)))
12420 {
12421 const_addend = XEXP (x, 1);
12422 x = XEXP (x, 0);
12423 }
12424
12425 if (GET_CODE (x) == UNSPEC
12426 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
12427 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
12428 result = XVECEXP (x, 0, 0);
12429
12430 if (TARGET_MACHO && darwin_local_data_pic (x)
12431 && !MEM_P (orig_x))
12432 result = XVECEXP (x, 0, 0);
12433
12434 if (! result)
12435 return ix86_delegitimize_tls_address (orig_x);
12436
12437 if (const_addend)
12438 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
12439 if (reg_addend)
12440 result = gen_rtx_PLUS (Pmode, reg_addend, result);
12441 if (addend)
12442 {
12443 /* If the rest of original X doesn't involve the PIC register, add
12444 addend and subtract pic_offset_table_rtx. This can happen e.g.
12445 for code like:
12446 leal (%ebx, %ecx, 4), %ecx
12447 ...
12448 movl foo@GOTOFF(%ecx), %edx
12449 in which case we return (%ecx - %ebx) + foo. */
12450 if (pic_offset_table_rtx)
12451 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
12452 pic_offset_table_rtx),
12453 result);
12454 else
12455 return orig_x;
12456 }
12457 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
12458 return simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
12459 return result;
12460 }
12461
12462 /* If X is a machine specific address (i.e. a symbol or label being
12463 referenced as a displacement from the GOT implemented using an
12464 UNSPEC), then return the base term. Otherwise return X. */
12465
12466 rtx
12467 ix86_find_base_term (rtx x)
12468 {
12469 rtx term;
12470
12471 if (TARGET_64BIT)
12472 {
12473 if (GET_CODE (x) != CONST)
12474 return x;
12475 term = XEXP (x, 0);
12476 if (GET_CODE (term) == PLUS
12477 && (CONST_INT_P (XEXP (term, 1))
12478 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
12479 term = XEXP (term, 0);
12480 if (GET_CODE (term) != UNSPEC
12481 || XINT (term, 1) != UNSPEC_GOTPCREL)
12482 return x;
12483
12484 return XVECEXP (term, 0, 0);
12485 }
12486
12487 return ix86_delegitimize_address (x);
12488 }
12489 \f
12490 static void
12491 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
12492 int fp, FILE *file)
12493 {
12494 const char *suffix;
12495
12496 if (mode == CCFPmode || mode == CCFPUmode)
12497 {
12498 code = ix86_fp_compare_code_to_integer (code);
12499 mode = CCmode;
12500 }
12501 if (reverse)
12502 code = reverse_condition (code);
12503
12504 switch (code)
12505 {
12506 case EQ:
12507 switch (mode)
12508 {
12509 case CCAmode:
12510 suffix = "a";
12511 break;
12512
12513 case CCCmode:
12514 suffix = "c";
12515 break;
12516
12517 case CCOmode:
12518 suffix = "o";
12519 break;
12520
12521 case CCSmode:
12522 suffix = "s";
12523 break;
12524
12525 default:
12526 suffix = "e";
12527 }
12528 break;
12529 case NE:
12530 switch (mode)
12531 {
12532 case CCAmode:
12533 suffix = "na";
12534 break;
12535
12536 case CCCmode:
12537 suffix = "nc";
12538 break;
12539
12540 case CCOmode:
12541 suffix = "no";
12542 break;
12543
12544 case CCSmode:
12545 suffix = "ns";
12546 break;
12547
12548 default:
12549 suffix = "ne";
12550 }
12551 break;
12552 case GT:
12553 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
12554 suffix = "g";
12555 break;
12556 case GTU:
12557 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
12558 Those same assemblers have the same but opposite lossage on cmov. */
12559 if (mode == CCmode)
12560 suffix = fp ? "nbe" : "a";
12561 else if (mode == CCCmode)
12562 suffix = "b";
12563 else
12564 gcc_unreachable ();
12565 break;
12566 case LT:
12567 switch (mode)
12568 {
12569 case CCNOmode:
12570 case CCGOCmode:
12571 suffix = "s";
12572 break;
12573
12574 case CCmode:
12575 case CCGCmode:
12576 suffix = "l";
12577 break;
12578
12579 default:
12580 gcc_unreachable ();
12581 }
12582 break;
12583 case LTU:
12584 gcc_assert (mode == CCmode || mode == CCCmode);
12585 suffix = "b";
12586 break;
12587 case GE:
12588 switch (mode)
12589 {
12590 case CCNOmode:
12591 case CCGOCmode:
12592 suffix = "ns";
12593 break;
12594
12595 case CCmode:
12596 case CCGCmode:
12597 suffix = "ge";
12598 break;
12599
12600 default:
12601 gcc_unreachable ();
12602 }
12603 break;
12604 case GEU:
12605 /* ??? As above. */
12606 gcc_assert (mode == CCmode || mode == CCCmode);
12607 suffix = fp ? "nb" : "ae";
12608 break;
12609 case LE:
12610 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
12611 suffix = "le";
12612 break;
12613 case LEU:
12614 /* ??? As above. */
12615 if (mode == CCmode)
12616 suffix = "be";
12617 else if (mode == CCCmode)
12618 suffix = fp ? "nb" : "ae";
12619 else
12620 gcc_unreachable ();
12621 break;
12622 case UNORDERED:
12623 suffix = fp ? "u" : "p";
12624 break;
12625 case ORDERED:
12626 suffix = fp ? "nu" : "np";
12627 break;
12628 default:
12629 gcc_unreachable ();
12630 }
12631 fputs (suffix, file);
12632 }
12633
12634 /* Print the name of register X to FILE based on its machine mode and number.
12635 If CODE is 'w', pretend the mode is HImode.
12636 If CODE is 'b', pretend the mode is QImode.
12637 If CODE is 'k', pretend the mode is SImode.
12638 If CODE is 'q', pretend the mode is DImode.
12639 If CODE is 'x', pretend the mode is V4SFmode.
12640 If CODE is 't', pretend the mode is V8SFmode.
12641 If CODE is 'h', pretend the reg is the 'high' byte register.
12642 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
12643 If CODE is 'd', duplicate the operand for AVX instruction.
12644 */
12645
12646 void
12647 print_reg (rtx x, int code, FILE *file)
12648 {
12649 const char *reg;
12650 bool duplicated = code == 'd' && TARGET_AVX;
12651
12652 gcc_assert (x == pc_rtx
12653 || (REGNO (x) != ARG_POINTER_REGNUM
12654 && REGNO (x) != FRAME_POINTER_REGNUM
12655 && REGNO (x) != FLAGS_REG
12656 && REGNO (x) != FPSR_REG
12657 && REGNO (x) != FPCR_REG));
12658
12659 if (ASSEMBLER_DIALECT == ASM_ATT)
12660 putc ('%', file);
12661
12662 if (x == pc_rtx)
12663 {
12664 gcc_assert (TARGET_64BIT);
12665 fputs ("rip", file);
12666 return;
12667 }
12668
12669 if (code == 'w' || MMX_REG_P (x))
12670 code = 2;
12671 else if (code == 'b')
12672 code = 1;
12673 else if (code == 'k')
12674 code = 4;
12675 else if (code == 'q')
12676 code = 8;
12677 else if (code == 'y')
12678 code = 3;
12679 else if (code == 'h')
12680 code = 0;
12681 else if (code == 'x')
12682 code = 16;
12683 else if (code == 't')
12684 code = 32;
12685 else
12686 code = GET_MODE_SIZE (GET_MODE (x));
12687
12688 /* Irritatingly, AMD extended registers use different naming convention
12689 from the normal registers. */
12690 if (REX_INT_REG_P (x))
12691 {
12692 gcc_assert (TARGET_64BIT);
12693 switch (code)
12694 {
12695 case 0:
12696 error ("extended registers have no high halves");
12697 break;
12698 case 1:
12699 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
12700 break;
12701 case 2:
12702 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
12703 break;
12704 case 4:
12705 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
12706 break;
12707 case 8:
12708 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
12709 break;
12710 default:
12711 error ("unsupported operand size for extended register");
12712 break;
12713 }
12714 return;
12715 }
12716
12717 reg = NULL;
12718 switch (code)
12719 {
12720 case 3:
12721 if (STACK_TOP_P (x))
12722 {
12723 reg = "st(0)";
12724 break;
12725 }
12726 /* FALLTHRU */
12727 case 8:
12728 case 4:
12729 case 12:
12730 if (! ANY_FP_REG_P (x))
12731 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
12732 /* FALLTHRU */
12733 case 16:
12734 case 2:
12735 normal:
12736 reg = hi_reg_name[REGNO (x)];
12737 break;
12738 case 1:
12739 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
12740 goto normal;
12741 reg = qi_reg_name[REGNO (x)];
12742 break;
12743 case 0:
12744 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
12745 goto normal;
12746 reg = qi_high_reg_name[REGNO (x)];
12747 break;
12748 case 32:
12749 if (SSE_REG_P (x))
12750 {
12751 gcc_assert (!duplicated);
12752 putc ('y', file);
12753 fputs (hi_reg_name[REGNO (x)] + 1, file);
12754 return;
12755 }
12756 break;
12757 default:
12758 gcc_unreachable ();
12759 }
12760
12761 fputs (reg, file);
12762 if (duplicated)
12763 {
12764 if (ASSEMBLER_DIALECT == ASM_ATT)
12765 fprintf (file, ", %%%s", reg);
12766 else
12767 fprintf (file, ", %s", reg);
12768 }
12769 }
12770
12771 /* Locate some local-dynamic symbol still in use by this function
12772 so that we can print its name in some tls_local_dynamic_base
12773 pattern. */
12774
12775 static int
12776 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
12777 {
12778 rtx x = *px;
12779
12780 if (GET_CODE (x) == SYMBOL_REF
12781 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
12782 {
12783 cfun->machine->some_ld_name = XSTR (x, 0);
12784 return 1;
12785 }
12786
12787 return 0;
12788 }
12789
12790 static const char *
12791 get_some_local_dynamic_name (void)
12792 {
12793 rtx insn;
12794
12795 if (cfun->machine->some_ld_name)
12796 return cfun->machine->some_ld_name;
12797
12798 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
12799 if (NONDEBUG_INSN_P (insn)
12800 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
12801 return cfun->machine->some_ld_name;
12802
12803 return NULL;
12804 }
12805
12806 /* Meaning of CODE:
12807 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
12808 C -- print opcode suffix for set/cmov insn.
12809 c -- like C, but print reversed condition
12810 F,f -- likewise, but for floating-point.
12811 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
12812 otherwise nothing
12813 R -- print the prefix for register names.
12814 z -- print the opcode suffix for the size of the current operand.
12815 Z -- likewise, with special suffixes for x87 instructions.
12816 * -- print a star (in certain assembler syntax)
12817 A -- print an absolute memory reference.
12818 w -- print the operand as if it's a "word" (HImode) even if it isn't.
12819 s -- print a shift double count, followed by the assemblers argument
12820 delimiter.
12821 b -- print the QImode name of the register for the indicated operand.
12822 %b0 would print %al if operands[0] is reg 0.
12823 w -- likewise, print the HImode name of the register.
12824 k -- likewise, print the SImode name of the register.
12825 q -- likewise, print the DImode name of the register.
12826 x -- likewise, print the V4SFmode name of the register.
12827 t -- likewise, print the V8SFmode name of the register.
12828 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
12829 y -- print "st(0)" instead of "st" as a register.
12830 d -- print duplicated register operand for AVX instruction.
12831 D -- print condition for SSE cmp instruction.
12832 P -- if PIC, print an @PLT suffix.
12833 X -- don't print any sort of PIC '@' suffix for a symbol.
12834 & -- print some in-use local-dynamic symbol name.
12835 H -- print a memory address offset by 8; used for sse high-parts
12836 Y -- print condition for XOP pcom* instruction.
12837 + -- print a branch hint as 'cs' or 'ds' prefix
12838 ; -- print a semicolon (after prefixes due to bug in older gas).
12839 @ -- print a segment register of thread base pointer load
12840 */
12841
12842 void
12843 ix86_print_operand (FILE *file, rtx x, int code)
12844 {
12845 if (code)
12846 {
12847 switch (code)
12848 {
12849 case '*':
12850 if (ASSEMBLER_DIALECT == ASM_ATT)
12851 putc ('*', file);
12852 return;
12853
12854 case '&':
12855 {
12856 const char *name = get_some_local_dynamic_name ();
12857 if (name == NULL)
12858 output_operand_lossage ("'%%&' used without any "
12859 "local dynamic TLS references");
12860 else
12861 assemble_name (file, name);
12862 return;
12863 }
12864
12865 case 'A':
12866 switch (ASSEMBLER_DIALECT)
12867 {
12868 case ASM_ATT:
12869 putc ('*', file);
12870 break;
12871
12872 case ASM_INTEL:
12873 /* Intel syntax. For absolute addresses, registers should not
12874 be surrounded by braces. */
12875 if (!REG_P (x))
12876 {
12877 putc ('[', file);
12878 ix86_print_operand (file, x, 0);
12879 putc (']', file);
12880 return;
12881 }
12882 break;
12883
12884 default:
12885 gcc_unreachable ();
12886 }
12887
12888 ix86_print_operand (file, x, 0);
12889 return;
12890
12891
12892 case 'L':
12893 if (ASSEMBLER_DIALECT == ASM_ATT)
12894 putc ('l', file);
12895 return;
12896
12897 case 'W':
12898 if (ASSEMBLER_DIALECT == ASM_ATT)
12899 putc ('w', file);
12900 return;
12901
12902 case 'B':
12903 if (ASSEMBLER_DIALECT == ASM_ATT)
12904 putc ('b', file);
12905 return;
12906
12907 case 'Q':
12908 if (ASSEMBLER_DIALECT == ASM_ATT)
12909 putc ('l', file);
12910 return;
12911
12912 case 'S':
12913 if (ASSEMBLER_DIALECT == ASM_ATT)
12914 putc ('s', file);
12915 return;
12916
12917 case 'T':
12918 if (ASSEMBLER_DIALECT == ASM_ATT)
12919 putc ('t', file);
12920 return;
12921
12922 case 'z':
12923 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12924 {
12925 /* Opcodes don't get size suffixes if using Intel opcodes. */
12926 if (ASSEMBLER_DIALECT == ASM_INTEL)
12927 return;
12928
12929 switch (GET_MODE_SIZE (GET_MODE (x)))
12930 {
12931 case 1:
12932 putc ('b', file);
12933 return;
12934
12935 case 2:
12936 putc ('w', file);
12937 return;
12938
12939 case 4:
12940 putc ('l', file);
12941 return;
12942
12943 case 8:
12944 putc ('q', file);
12945 return;
12946
12947 default:
12948 output_operand_lossage
12949 ("invalid operand size for operand code '%c'", code);
12950 return;
12951 }
12952 }
12953
12954 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12955 warning
12956 (0, "non-integer operand used with operand code '%c'", code);
12957 /* FALLTHRU */
12958
12959 case 'Z':
12960 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
12961 if (ASSEMBLER_DIALECT == ASM_INTEL)
12962 return;
12963
12964 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12965 {
12966 switch (GET_MODE_SIZE (GET_MODE (x)))
12967 {
12968 case 2:
12969 #ifdef HAVE_AS_IX86_FILDS
12970 putc ('s', file);
12971 #endif
12972 return;
12973
12974 case 4:
12975 putc ('l', file);
12976 return;
12977
12978 case 8:
12979 #ifdef HAVE_AS_IX86_FILDQ
12980 putc ('q', file);
12981 #else
12982 fputs ("ll", file);
12983 #endif
12984 return;
12985
12986 default:
12987 break;
12988 }
12989 }
12990 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12991 {
12992 /* 387 opcodes don't get size suffixes
12993 if the operands are registers. */
12994 if (STACK_REG_P (x))
12995 return;
12996
12997 switch (GET_MODE_SIZE (GET_MODE (x)))
12998 {
12999 case 4:
13000 putc ('s', file);
13001 return;
13002
13003 case 8:
13004 putc ('l', file);
13005 return;
13006
13007 case 12:
13008 case 16:
13009 putc ('t', file);
13010 return;
13011
13012 default:
13013 break;
13014 }
13015 }
13016 else
13017 {
13018 output_operand_lossage
13019 ("invalid operand type used with operand code '%c'", code);
13020 return;
13021 }
13022
13023 output_operand_lossage
13024 ("invalid operand size for operand code '%c'", code);
13025 return;
13026
13027 case 'd':
13028 case 'b':
13029 case 'w':
13030 case 'k':
13031 case 'q':
13032 case 'h':
13033 case 't':
13034 case 'y':
13035 case 'x':
13036 case 'X':
13037 case 'P':
13038 break;
13039
13040 case 's':
13041 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
13042 {
13043 ix86_print_operand (file, x, 0);
13044 fputs (", ", file);
13045 }
13046 return;
13047
13048 case 'D':
13049 /* Little bit of braindamage here. The SSE compare instructions
13050 does use completely different names for the comparisons that the
13051 fp conditional moves. */
13052 if (TARGET_AVX)
13053 {
13054 switch (GET_CODE (x))
13055 {
13056 case EQ:
13057 fputs ("eq", file);
13058 break;
13059 case UNEQ:
13060 fputs ("eq_us", file);
13061 break;
13062 case LT:
13063 fputs ("lt", file);
13064 break;
13065 case UNLT:
13066 fputs ("nge", file);
13067 break;
13068 case LE:
13069 fputs ("le", file);
13070 break;
13071 case UNLE:
13072 fputs ("ngt", file);
13073 break;
13074 case UNORDERED:
13075 fputs ("unord", file);
13076 break;
13077 case NE:
13078 fputs ("neq", file);
13079 break;
13080 case LTGT:
13081 fputs ("neq_oq", file);
13082 break;
13083 case GE:
13084 fputs ("ge", file);
13085 break;
13086 case UNGE:
13087 fputs ("nlt", file);
13088 break;
13089 case GT:
13090 fputs ("gt", file);
13091 break;
13092 case UNGT:
13093 fputs ("nle", file);
13094 break;
13095 case ORDERED:
13096 fputs ("ord", file);
13097 break;
13098 default:
13099 output_operand_lossage ("operand is not a condition code, "
13100 "invalid operand code 'D'");
13101 return;
13102 }
13103 }
13104 else
13105 {
13106 switch (GET_CODE (x))
13107 {
13108 case EQ:
13109 case UNEQ:
13110 fputs ("eq", file);
13111 break;
13112 case LT:
13113 case UNLT:
13114 fputs ("lt", file);
13115 break;
13116 case LE:
13117 case UNLE:
13118 fputs ("le", file);
13119 break;
13120 case UNORDERED:
13121 fputs ("unord", file);
13122 break;
13123 case NE:
13124 case LTGT:
13125 fputs ("neq", file);
13126 break;
13127 case UNGE:
13128 case GE:
13129 fputs ("nlt", file);
13130 break;
13131 case UNGT:
13132 case GT:
13133 fputs ("nle", file);
13134 break;
13135 case ORDERED:
13136 fputs ("ord", file);
13137 break;
13138 default:
13139 output_operand_lossage ("operand is not a condition code, "
13140 "invalid operand code 'D'");
13141 return;
13142 }
13143 }
13144 return;
13145 case 'O':
13146 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13147 if (ASSEMBLER_DIALECT == ASM_ATT)
13148 {
13149 switch (GET_MODE (x))
13150 {
13151 case HImode: putc ('w', file); break;
13152 case SImode:
13153 case SFmode: putc ('l', file); break;
13154 case DImode:
13155 case DFmode: putc ('q', file); break;
13156 default: gcc_unreachable ();
13157 }
13158 putc ('.', file);
13159 }
13160 #endif
13161 return;
13162 case 'C':
13163 if (!COMPARISON_P (x))
13164 {
13165 output_operand_lossage ("operand is neither a constant nor a "
13166 "condition code, invalid operand code "
13167 "'C'");
13168 return;
13169 }
13170 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
13171 return;
13172 case 'F':
13173 if (!COMPARISON_P (x))
13174 {
13175 output_operand_lossage ("operand is neither a constant nor a "
13176 "condition code, invalid operand code "
13177 "'F'");
13178 return;
13179 }
13180 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13181 if (ASSEMBLER_DIALECT == ASM_ATT)
13182 putc ('.', file);
13183 #endif
13184 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
13185 return;
13186
13187 /* Like above, but reverse condition */
13188 case 'c':
13189 /* Check to see if argument to %c is really a constant
13190 and not a condition code which needs to be reversed. */
13191 if (!COMPARISON_P (x))
13192 {
13193 output_operand_lossage ("operand is neither a constant nor a "
13194 "condition code, invalid operand "
13195 "code 'c'");
13196 return;
13197 }
13198 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
13199 return;
13200 case 'f':
13201 if (!COMPARISON_P (x))
13202 {
13203 output_operand_lossage ("operand is neither a constant nor a "
13204 "condition code, invalid operand "
13205 "code 'f'");
13206 return;
13207 }
13208 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13209 if (ASSEMBLER_DIALECT == ASM_ATT)
13210 putc ('.', file);
13211 #endif
13212 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
13213 return;
13214
13215 case 'H':
13216 /* It doesn't actually matter what mode we use here, as we're
13217 only going to use this for printing. */
13218 x = adjust_address_nv (x, DImode, 8);
13219 break;
13220
13221 case '+':
13222 {
13223 rtx x;
13224
13225 if (!optimize
13226 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
13227 return;
13228
13229 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
13230 if (x)
13231 {
13232 int pred_val = INTVAL (XEXP (x, 0));
13233
13234 if (pred_val < REG_BR_PROB_BASE * 45 / 100
13235 || pred_val > REG_BR_PROB_BASE * 55 / 100)
13236 {
13237 int taken = pred_val > REG_BR_PROB_BASE / 2;
13238 int cputaken = final_forward_branch_p (current_output_insn) == 0;
13239
13240 /* Emit hints only in the case default branch prediction
13241 heuristics would fail. */
13242 if (taken != cputaken)
13243 {
13244 /* We use 3e (DS) prefix for taken branches and
13245 2e (CS) prefix for not taken branches. */
13246 if (taken)
13247 fputs ("ds ; ", file);
13248 else
13249 fputs ("cs ; ", file);
13250 }
13251 }
13252 }
13253 return;
13254 }
13255
13256 case 'Y':
13257 switch (GET_CODE (x))
13258 {
13259 case NE:
13260 fputs ("neq", file);
13261 break;
13262 case EQ:
13263 fputs ("eq", file);
13264 break;
13265 case GE:
13266 case GEU:
13267 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
13268 break;
13269 case GT:
13270 case GTU:
13271 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
13272 break;
13273 case LE:
13274 case LEU:
13275 fputs ("le", file);
13276 break;
13277 case LT:
13278 case LTU:
13279 fputs ("lt", file);
13280 break;
13281 case UNORDERED:
13282 fputs ("unord", file);
13283 break;
13284 case ORDERED:
13285 fputs ("ord", file);
13286 break;
13287 case UNEQ:
13288 fputs ("ueq", file);
13289 break;
13290 case UNGE:
13291 fputs ("nlt", file);
13292 break;
13293 case UNGT:
13294 fputs ("nle", file);
13295 break;
13296 case UNLE:
13297 fputs ("ule", file);
13298 break;
13299 case UNLT:
13300 fputs ("ult", file);
13301 break;
13302 case LTGT:
13303 fputs ("une", file);
13304 break;
13305 default:
13306 output_operand_lossage ("operand is not a condition code, "
13307 "invalid operand code 'Y'");
13308 return;
13309 }
13310 return;
13311
13312 case ';':
13313 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
13314 putc (';', file);
13315 #endif
13316 return;
13317
13318 case '@':
13319 if (ASSEMBLER_DIALECT == ASM_ATT)
13320 putc ('%', file);
13321
13322 /* The kernel uses a different segment register for performance
13323 reasons; a system call would not have to trash the userspace
13324 segment register, which would be expensive. */
13325 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
13326 fputs ("fs", file);
13327 else
13328 fputs ("gs", file);
13329 return;
13330
13331 default:
13332 output_operand_lossage ("invalid operand code '%c'", code);
13333 }
13334 }
13335
13336 if (REG_P (x))
13337 print_reg (x, code, file);
13338
13339 else if (MEM_P (x))
13340 {
13341 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
13342 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
13343 && GET_MODE (x) != BLKmode)
13344 {
13345 const char * size;
13346 switch (GET_MODE_SIZE (GET_MODE (x)))
13347 {
13348 case 1: size = "BYTE"; break;
13349 case 2: size = "WORD"; break;
13350 case 4: size = "DWORD"; break;
13351 case 8: size = "QWORD"; break;
13352 case 12: size = "TBYTE"; break;
13353 case 16:
13354 if (GET_MODE (x) == XFmode)
13355 size = "TBYTE";
13356 else
13357 size = "XMMWORD";
13358 break;
13359 case 32: size = "YMMWORD"; break;
13360 default:
13361 gcc_unreachable ();
13362 }
13363
13364 /* Check for explicit size override (codes 'b', 'w' and 'k') */
13365 if (code == 'b')
13366 size = "BYTE";
13367 else if (code == 'w')
13368 size = "WORD";
13369 else if (code == 'k')
13370 size = "DWORD";
13371
13372 fputs (size, file);
13373 fputs (" PTR ", file);
13374 }
13375
13376 x = XEXP (x, 0);
13377 /* Avoid (%rip) for call operands. */
13378 if (CONSTANT_ADDRESS_P (x) && code == 'P'
13379 && !CONST_INT_P (x))
13380 output_addr_const (file, x);
13381 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
13382 output_operand_lossage ("invalid constraints for operand");
13383 else
13384 output_address (x);
13385 }
13386
13387 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
13388 {
13389 REAL_VALUE_TYPE r;
13390 long l;
13391
13392 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13393 REAL_VALUE_TO_TARGET_SINGLE (r, l);
13394
13395 if (ASSEMBLER_DIALECT == ASM_ATT)
13396 putc ('$', file);
13397 /* Sign extend 32bit SFmode immediate to 8 bytes. */
13398 if (code == 'q')
13399 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
13400 else
13401 fprintf (file, "0x%08x", (unsigned int) l);
13402 }
13403
13404 /* These float cases don't actually occur as immediate operands. */
13405 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
13406 {
13407 char dstr[30];
13408
13409 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
13410 fputs (dstr, file);
13411 }
13412
13413 else if (GET_CODE (x) == CONST_DOUBLE
13414 && GET_MODE (x) == XFmode)
13415 {
13416 char dstr[30];
13417
13418 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
13419 fputs (dstr, file);
13420 }
13421
13422 else
13423 {
13424 /* We have patterns that allow zero sets of memory, for instance.
13425 In 64-bit mode, we should probably support all 8-byte vectors,
13426 since we can in fact encode that into an immediate. */
13427 if (GET_CODE (x) == CONST_VECTOR)
13428 {
13429 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
13430 x = const0_rtx;
13431 }
13432
13433 if (code != 'P')
13434 {
13435 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
13436 {
13437 if (ASSEMBLER_DIALECT == ASM_ATT)
13438 putc ('$', file);
13439 }
13440 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
13441 || GET_CODE (x) == LABEL_REF)
13442 {
13443 if (ASSEMBLER_DIALECT == ASM_ATT)
13444 putc ('$', file);
13445 else
13446 fputs ("OFFSET FLAT:", file);
13447 }
13448 }
13449 if (CONST_INT_P (x))
13450 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13451 else if (flag_pic)
13452 output_pic_addr_const (file, x, code);
13453 else
13454 output_addr_const (file, x);
13455 }
13456 }
13457
13458 static bool
13459 ix86_print_operand_punct_valid_p (unsigned char code)
13460 {
13461 return (code == '@' || code == '*' || code == '+'
13462 || code == '&' || code == ';');
13463 }
13464 \f
13465 /* Print a memory operand whose address is ADDR. */
13466
13467 static void
13468 ix86_print_operand_address (FILE *file, rtx addr)
13469 {
13470 struct ix86_address parts;
13471 rtx base, index, disp;
13472 int scale;
13473 int ok = ix86_decompose_address (addr, &parts);
13474
13475 gcc_assert (ok);
13476
13477 base = parts.base;
13478 index = parts.index;
13479 disp = parts.disp;
13480 scale = parts.scale;
13481
13482 switch (parts.seg)
13483 {
13484 case SEG_DEFAULT:
13485 break;
13486 case SEG_FS:
13487 case SEG_GS:
13488 if (ASSEMBLER_DIALECT == ASM_ATT)
13489 putc ('%', file);
13490 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
13491 break;
13492 default:
13493 gcc_unreachable ();
13494 }
13495
13496 /* Use one byte shorter RIP relative addressing for 64bit mode. */
13497 if (TARGET_64BIT && !base && !index)
13498 {
13499 rtx symbol = disp;
13500
13501 if (GET_CODE (disp) == CONST
13502 && GET_CODE (XEXP (disp, 0)) == PLUS
13503 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13504 symbol = XEXP (XEXP (disp, 0), 0);
13505
13506 if (GET_CODE (symbol) == LABEL_REF
13507 || (GET_CODE (symbol) == SYMBOL_REF
13508 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
13509 base = pc_rtx;
13510 }
13511 if (!base && !index)
13512 {
13513 /* Displacement only requires special attention. */
13514
13515 if (CONST_INT_P (disp))
13516 {
13517 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
13518 fputs ("ds:", file);
13519 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
13520 }
13521 else if (flag_pic)
13522 output_pic_addr_const (file, disp, 0);
13523 else
13524 output_addr_const (file, disp);
13525 }
13526 else
13527 {
13528 if (ASSEMBLER_DIALECT == ASM_ATT)
13529 {
13530 if (disp)
13531 {
13532 if (flag_pic)
13533 output_pic_addr_const (file, disp, 0);
13534 else if (GET_CODE (disp) == LABEL_REF)
13535 output_asm_label (disp);
13536 else
13537 output_addr_const (file, disp);
13538 }
13539
13540 putc ('(', file);
13541 if (base)
13542 print_reg (base, 0, file);
13543 if (index)
13544 {
13545 putc (',', file);
13546 print_reg (index, 0, file);
13547 if (scale != 1)
13548 fprintf (file, ",%d", scale);
13549 }
13550 putc (')', file);
13551 }
13552 else
13553 {
13554 rtx offset = NULL_RTX;
13555
13556 if (disp)
13557 {
13558 /* Pull out the offset of a symbol; print any symbol itself. */
13559 if (GET_CODE (disp) == CONST
13560 && GET_CODE (XEXP (disp, 0)) == PLUS
13561 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13562 {
13563 offset = XEXP (XEXP (disp, 0), 1);
13564 disp = gen_rtx_CONST (VOIDmode,
13565 XEXP (XEXP (disp, 0), 0));
13566 }
13567
13568 if (flag_pic)
13569 output_pic_addr_const (file, disp, 0);
13570 else if (GET_CODE (disp) == LABEL_REF)
13571 output_asm_label (disp);
13572 else if (CONST_INT_P (disp))
13573 offset = disp;
13574 else
13575 output_addr_const (file, disp);
13576 }
13577
13578 putc ('[', file);
13579 if (base)
13580 {
13581 print_reg (base, 0, file);
13582 if (offset)
13583 {
13584 if (INTVAL (offset) >= 0)
13585 putc ('+', file);
13586 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13587 }
13588 }
13589 else if (offset)
13590 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13591 else
13592 putc ('0', file);
13593
13594 if (index)
13595 {
13596 putc ('+', file);
13597 print_reg (index, 0, file);
13598 if (scale != 1)
13599 fprintf (file, "*%d", scale);
13600 }
13601 putc (']', file);
13602 }
13603 }
13604 }
13605
13606 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13607
13608 static bool
13609 i386_asm_output_addr_const_extra (FILE *file, rtx x)
13610 {
13611 rtx op;
13612
13613 if (GET_CODE (x) != UNSPEC)
13614 return false;
13615
13616 op = XVECEXP (x, 0, 0);
13617 switch (XINT (x, 1))
13618 {
13619 case UNSPEC_GOTTPOFF:
13620 output_addr_const (file, op);
13621 /* FIXME: This might be @TPOFF in Sun ld. */
13622 fputs ("@gottpoff", file);
13623 break;
13624 case UNSPEC_TPOFF:
13625 output_addr_const (file, op);
13626 fputs ("@tpoff", file);
13627 break;
13628 case UNSPEC_NTPOFF:
13629 output_addr_const (file, op);
13630 if (TARGET_64BIT)
13631 fputs ("@tpoff", file);
13632 else
13633 fputs ("@ntpoff", file);
13634 break;
13635 case UNSPEC_DTPOFF:
13636 output_addr_const (file, op);
13637 fputs ("@dtpoff", file);
13638 break;
13639 case UNSPEC_GOTNTPOFF:
13640 output_addr_const (file, op);
13641 if (TARGET_64BIT)
13642 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13643 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
13644 else
13645 fputs ("@gotntpoff", file);
13646 break;
13647 case UNSPEC_INDNTPOFF:
13648 output_addr_const (file, op);
13649 fputs ("@indntpoff", file);
13650 break;
13651 #if TARGET_MACHO
13652 case UNSPEC_MACHOPIC_OFFSET:
13653 output_addr_const (file, op);
13654 putc ('-', file);
13655 machopic_output_function_base_name (file);
13656 break;
13657 #endif
13658
13659 case UNSPEC_STACK_CHECK:
13660 {
13661 int offset;
13662
13663 gcc_assert (flag_split_stack);
13664
13665 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
13666 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
13667 #else
13668 gcc_unreachable ();
13669 #endif
13670
13671 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
13672 }
13673 break;
13674
13675 default:
13676 return false;
13677 }
13678
13679 return true;
13680 }
13681 \f
13682 /* Split one or more double-mode RTL references into pairs of half-mode
13683 references. The RTL can be REG, offsettable MEM, integer constant, or
13684 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
13685 split and "num" is its length. lo_half and hi_half are output arrays
13686 that parallel "operands". */
13687
13688 void
13689 split_double_mode (enum machine_mode mode, rtx operands[],
13690 int num, rtx lo_half[], rtx hi_half[])
13691 {
13692 enum machine_mode half_mode;
13693 unsigned int byte;
13694
13695 switch (mode)
13696 {
13697 case TImode:
13698 half_mode = DImode;
13699 break;
13700 case DImode:
13701 half_mode = SImode;
13702 break;
13703 default:
13704 gcc_unreachable ();
13705 }
13706
13707 byte = GET_MODE_SIZE (half_mode);
13708
13709 while (num--)
13710 {
13711 rtx op = operands[num];
13712
13713 /* simplify_subreg refuse to split volatile memory addresses,
13714 but we still have to handle it. */
13715 if (MEM_P (op))
13716 {
13717 lo_half[num] = adjust_address (op, half_mode, 0);
13718 hi_half[num] = adjust_address (op, half_mode, byte);
13719 }
13720 else
13721 {
13722 lo_half[num] = simplify_gen_subreg (half_mode, op,
13723 GET_MODE (op) == VOIDmode
13724 ? mode : GET_MODE (op), 0);
13725 hi_half[num] = simplify_gen_subreg (half_mode, op,
13726 GET_MODE (op) == VOIDmode
13727 ? mode : GET_MODE (op), byte);
13728 }
13729 }
13730 }
13731 \f
13732 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
13733 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
13734 is the expression of the binary operation. The output may either be
13735 emitted here, or returned to the caller, like all output_* functions.
13736
13737 There is no guarantee that the operands are the same mode, as they
13738 might be within FLOAT or FLOAT_EXTEND expressions. */
13739
13740 #ifndef SYSV386_COMPAT
13741 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
13742 wants to fix the assemblers because that causes incompatibility
13743 with gcc. No-one wants to fix gcc because that causes
13744 incompatibility with assemblers... You can use the option of
13745 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
13746 #define SYSV386_COMPAT 1
13747 #endif
13748
13749 const char *
13750 output_387_binary_op (rtx insn, rtx *operands)
13751 {
13752 static char buf[40];
13753 const char *p;
13754 const char *ssep;
13755 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
13756
13757 #ifdef ENABLE_CHECKING
13758 /* Even if we do not want to check the inputs, this documents input
13759 constraints. Which helps in understanding the following code. */
13760 if (STACK_REG_P (operands[0])
13761 && ((REG_P (operands[1])
13762 && REGNO (operands[0]) == REGNO (operands[1])
13763 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
13764 || (REG_P (operands[2])
13765 && REGNO (operands[0]) == REGNO (operands[2])
13766 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
13767 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
13768 ; /* ok */
13769 else
13770 gcc_assert (is_sse);
13771 #endif
13772
13773 switch (GET_CODE (operands[3]))
13774 {
13775 case PLUS:
13776 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13777 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13778 p = "fiadd";
13779 else
13780 p = "fadd";
13781 ssep = "vadd";
13782 break;
13783
13784 case MINUS:
13785 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13786 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13787 p = "fisub";
13788 else
13789 p = "fsub";
13790 ssep = "vsub";
13791 break;
13792
13793 case MULT:
13794 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13795 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13796 p = "fimul";
13797 else
13798 p = "fmul";
13799 ssep = "vmul";
13800 break;
13801
13802 case DIV:
13803 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13804 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13805 p = "fidiv";
13806 else
13807 p = "fdiv";
13808 ssep = "vdiv";
13809 break;
13810
13811 default:
13812 gcc_unreachable ();
13813 }
13814
13815 if (is_sse)
13816 {
13817 if (TARGET_AVX)
13818 {
13819 strcpy (buf, ssep);
13820 if (GET_MODE (operands[0]) == SFmode)
13821 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
13822 else
13823 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
13824 }
13825 else
13826 {
13827 strcpy (buf, ssep + 1);
13828 if (GET_MODE (operands[0]) == SFmode)
13829 strcat (buf, "ss\t{%2, %0|%0, %2}");
13830 else
13831 strcat (buf, "sd\t{%2, %0|%0, %2}");
13832 }
13833 return buf;
13834 }
13835 strcpy (buf, p);
13836
13837 switch (GET_CODE (operands[3]))
13838 {
13839 case MULT:
13840 case PLUS:
13841 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
13842 {
13843 rtx temp = operands[2];
13844 operands[2] = operands[1];
13845 operands[1] = temp;
13846 }
13847
13848 /* know operands[0] == operands[1]. */
13849
13850 if (MEM_P (operands[2]))
13851 {
13852 p = "%Z2\t%2";
13853 break;
13854 }
13855
13856 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13857 {
13858 if (STACK_TOP_P (operands[0]))
13859 /* How is it that we are storing to a dead operand[2]?
13860 Well, presumably operands[1] is dead too. We can't
13861 store the result to st(0) as st(0) gets popped on this
13862 instruction. Instead store to operands[2] (which I
13863 think has to be st(1)). st(1) will be popped later.
13864 gcc <= 2.8.1 didn't have this check and generated
13865 assembly code that the Unixware assembler rejected. */
13866 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13867 else
13868 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13869 break;
13870 }
13871
13872 if (STACK_TOP_P (operands[0]))
13873 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13874 else
13875 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13876 break;
13877
13878 case MINUS:
13879 case DIV:
13880 if (MEM_P (operands[1]))
13881 {
13882 p = "r%Z1\t%1";
13883 break;
13884 }
13885
13886 if (MEM_P (operands[2]))
13887 {
13888 p = "%Z2\t%2";
13889 break;
13890 }
13891
13892 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13893 {
13894 #if SYSV386_COMPAT
13895 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
13896 derived assemblers, confusingly reverse the direction of
13897 the operation for fsub{r} and fdiv{r} when the
13898 destination register is not st(0). The Intel assembler
13899 doesn't have this brain damage. Read !SYSV386_COMPAT to
13900 figure out what the hardware really does. */
13901 if (STACK_TOP_P (operands[0]))
13902 p = "{p\t%0, %2|rp\t%2, %0}";
13903 else
13904 p = "{rp\t%2, %0|p\t%0, %2}";
13905 #else
13906 if (STACK_TOP_P (operands[0]))
13907 /* As above for fmul/fadd, we can't store to st(0). */
13908 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13909 else
13910 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13911 #endif
13912 break;
13913 }
13914
13915 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
13916 {
13917 #if SYSV386_COMPAT
13918 if (STACK_TOP_P (operands[0]))
13919 p = "{rp\t%0, %1|p\t%1, %0}";
13920 else
13921 p = "{p\t%1, %0|rp\t%0, %1}";
13922 #else
13923 if (STACK_TOP_P (operands[0]))
13924 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
13925 else
13926 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
13927 #endif
13928 break;
13929 }
13930
13931 if (STACK_TOP_P (operands[0]))
13932 {
13933 if (STACK_TOP_P (operands[1]))
13934 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13935 else
13936 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
13937 break;
13938 }
13939 else if (STACK_TOP_P (operands[1]))
13940 {
13941 #if SYSV386_COMPAT
13942 p = "{\t%1, %0|r\t%0, %1}";
13943 #else
13944 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
13945 #endif
13946 }
13947 else
13948 {
13949 #if SYSV386_COMPAT
13950 p = "{r\t%2, %0|\t%0, %2}";
13951 #else
13952 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13953 #endif
13954 }
13955 break;
13956
13957 default:
13958 gcc_unreachable ();
13959 }
13960
13961 strcat (buf, p);
13962 return buf;
13963 }
13964
13965 /* Return needed mode for entity in optimize_mode_switching pass. */
13966
13967 int
13968 ix86_mode_needed (int entity, rtx insn)
13969 {
13970 enum attr_i387_cw mode;
13971
13972 /* The mode UNINITIALIZED is used to store control word after a
13973 function call or ASM pattern. The mode ANY specify that function
13974 has no requirements on the control word and make no changes in the
13975 bits we are interested in. */
13976
13977 if (CALL_P (insn)
13978 || (NONJUMP_INSN_P (insn)
13979 && (asm_noperands (PATTERN (insn)) >= 0
13980 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
13981 return I387_CW_UNINITIALIZED;
13982
13983 if (recog_memoized (insn) < 0)
13984 return I387_CW_ANY;
13985
13986 mode = get_attr_i387_cw (insn);
13987
13988 switch (entity)
13989 {
13990 case I387_TRUNC:
13991 if (mode == I387_CW_TRUNC)
13992 return mode;
13993 break;
13994
13995 case I387_FLOOR:
13996 if (mode == I387_CW_FLOOR)
13997 return mode;
13998 break;
13999
14000 case I387_CEIL:
14001 if (mode == I387_CW_CEIL)
14002 return mode;
14003 break;
14004
14005 case I387_MASK_PM:
14006 if (mode == I387_CW_MASK_PM)
14007 return mode;
14008 break;
14009
14010 default:
14011 gcc_unreachable ();
14012 }
14013
14014 return I387_CW_ANY;
14015 }
14016
14017 /* Output code to initialize control word copies used by trunc?f?i and
14018 rounding patterns. CURRENT_MODE is set to current control word,
14019 while NEW_MODE is set to new control word. */
14020
14021 void
14022 emit_i387_cw_initialization (int mode)
14023 {
14024 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
14025 rtx new_mode;
14026
14027 enum ix86_stack_slot slot;
14028
14029 rtx reg = gen_reg_rtx (HImode);
14030
14031 emit_insn (gen_x86_fnstcw_1 (stored_mode));
14032 emit_move_insn (reg, copy_rtx (stored_mode));
14033
14034 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
14035 || optimize_function_for_size_p (cfun))
14036 {
14037 switch (mode)
14038 {
14039 case I387_CW_TRUNC:
14040 /* round toward zero (truncate) */
14041 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
14042 slot = SLOT_CW_TRUNC;
14043 break;
14044
14045 case I387_CW_FLOOR:
14046 /* round down toward -oo */
14047 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14048 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14049 slot = SLOT_CW_FLOOR;
14050 break;
14051
14052 case I387_CW_CEIL:
14053 /* round up toward +oo */
14054 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14055 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14056 slot = SLOT_CW_CEIL;
14057 break;
14058
14059 case I387_CW_MASK_PM:
14060 /* mask precision exception for nearbyint() */
14061 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14062 slot = SLOT_CW_MASK_PM;
14063 break;
14064
14065 default:
14066 gcc_unreachable ();
14067 }
14068 }
14069 else
14070 {
14071 switch (mode)
14072 {
14073 case I387_CW_TRUNC:
14074 /* round toward zero (truncate) */
14075 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
14076 slot = SLOT_CW_TRUNC;
14077 break;
14078
14079 case I387_CW_FLOOR:
14080 /* round down toward -oo */
14081 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
14082 slot = SLOT_CW_FLOOR;
14083 break;
14084
14085 case I387_CW_CEIL:
14086 /* round up toward +oo */
14087 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
14088 slot = SLOT_CW_CEIL;
14089 break;
14090
14091 case I387_CW_MASK_PM:
14092 /* mask precision exception for nearbyint() */
14093 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14094 slot = SLOT_CW_MASK_PM;
14095 break;
14096
14097 default:
14098 gcc_unreachable ();
14099 }
14100 }
14101
14102 gcc_assert (slot < MAX_386_STACK_LOCALS);
14103
14104 new_mode = assign_386_stack_local (HImode, slot);
14105 emit_move_insn (new_mode, reg);
14106 }
14107
14108 /* Output code for INSN to convert a float to a signed int. OPERANDS
14109 are the insn operands. The output may be [HSD]Imode and the input
14110 operand may be [SDX]Fmode. */
14111
14112 const char *
14113 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
14114 {
14115 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14116 int dimode_p = GET_MODE (operands[0]) == DImode;
14117 int round_mode = get_attr_i387_cw (insn);
14118
14119 /* Jump through a hoop or two for DImode, since the hardware has no
14120 non-popping instruction. We used to do this a different way, but
14121 that was somewhat fragile and broke with post-reload splitters. */
14122 if ((dimode_p || fisttp) && !stack_top_dies)
14123 output_asm_insn ("fld\t%y1", operands);
14124
14125 gcc_assert (STACK_TOP_P (operands[1]));
14126 gcc_assert (MEM_P (operands[0]));
14127 gcc_assert (GET_MODE (operands[1]) != TFmode);
14128
14129 if (fisttp)
14130 output_asm_insn ("fisttp%Z0\t%0", operands);
14131 else
14132 {
14133 if (round_mode != I387_CW_ANY)
14134 output_asm_insn ("fldcw\t%3", operands);
14135 if (stack_top_dies || dimode_p)
14136 output_asm_insn ("fistp%Z0\t%0", operands);
14137 else
14138 output_asm_insn ("fist%Z0\t%0", operands);
14139 if (round_mode != I387_CW_ANY)
14140 output_asm_insn ("fldcw\t%2", operands);
14141 }
14142
14143 return "";
14144 }
14145
14146 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14147 have the values zero or one, indicates the ffreep insn's operand
14148 from the OPERANDS array. */
14149
14150 static const char *
14151 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14152 {
14153 if (TARGET_USE_FFREEP)
14154 #ifdef HAVE_AS_IX86_FFREEP
14155 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14156 #else
14157 {
14158 static char retval[32];
14159 int regno = REGNO (operands[opno]);
14160
14161 gcc_assert (FP_REGNO_P (regno));
14162
14163 regno -= FIRST_STACK_REG;
14164
14165 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14166 return retval;
14167 }
14168 #endif
14169
14170 return opno ? "fstp\t%y1" : "fstp\t%y0";
14171 }
14172
14173
14174 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14175 should be used. UNORDERED_P is true when fucom should be used. */
14176
14177 const char *
14178 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
14179 {
14180 int stack_top_dies;
14181 rtx cmp_op0, cmp_op1;
14182 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
14183
14184 if (eflags_p)
14185 {
14186 cmp_op0 = operands[0];
14187 cmp_op1 = operands[1];
14188 }
14189 else
14190 {
14191 cmp_op0 = operands[1];
14192 cmp_op1 = operands[2];
14193 }
14194
14195 if (is_sse)
14196 {
14197 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
14198 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
14199 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
14200 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
14201
14202 if (GET_MODE (operands[0]) == SFmode)
14203 if (unordered_p)
14204 return &ucomiss[TARGET_AVX ? 0 : 1];
14205 else
14206 return &comiss[TARGET_AVX ? 0 : 1];
14207 else
14208 if (unordered_p)
14209 return &ucomisd[TARGET_AVX ? 0 : 1];
14210 else
14211 return &comisd[TARGET_AVX ? 0 : 1];
14212 }
14213
14214 gcc_assert (STACK_TOP_P (cmp_op0));
14215
14216 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14217
14218 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
14219 {
14220 if (stack_top_dies)
14221 {
14222 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
14223 return output_387_ffreep (operands, 1);
14224 }
14225 else
14226 return "ftst\n\tfnstsw\t%0";
14227 }
14228
14229 if (STACK_REG_P (cmp_op1)
14230 && stack_top_dies
14231 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
14232 && REGNO (cmp_op1) != FIRST_STACK_REG)
14233 {
14234 /* If both the top of the 387 stack dies, and the other operand
14235 is also a stack register that dies, then this must be a
14236 `fcompp' float compare */
14237
14238 if (eflags_p)
14239 {
14240 /* There is no double popping fcomi variant. Fortunately,
14241 eflags is immune from the fstp's cc clobbering. */
14242 if (unordered_p)
14243 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
14244 else
14245 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
14246 return output_387_ffreep (operands, 0);
14247 }
14248 else
14249 {
14250 if (unordered_p)
14251 return "fucompp\n\tfnstsw\t%0";
14252 else
14253 return "fcompp\n\tfnstsw\t%0";
14254 }
14255 }
14256 else
14257 {
14258 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
14259
14260 static const char * const alt[16] =
14261 {
14262 "fcom%Z2\t%y2\n\tfnstsw\t%0",
14263 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
14264 "fucom%Z2\t%y2\n\tfnstsw\t%0",
14265 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
14266
14267 "ficom%Z2\t%y2\n\tfnstsw\t%0",
14268 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
14269 NULL,
14270 NULL,
14271
14272 "fcomi\t{%y1, %0|%0, %y1}",
14273 "fcomip\t{%y1, %0|%0, %y1}",
14274 "fucomi\t{%y1, %0|%0, %y1}",
14275 "fucomip\t{%y1, %0|%0, %y1}",
14276
14277 NULL,
14278 NULL,
14279 NULL,
14280 NULL
14281 };
14282
14283 int mask;
14284 const char *ret;
14285
14286 mask = eflags_p << 3;
14287 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
14288 mask |= unordered_p << 1;
14289 mask |= stack_top_dies;
14290
14291 gcc_assert (mask < 16);
14292 ret = alt[mask];
14293 gcc_assert (ret);
14294
14295 return ret;
14296 }
14297 }
14298
14299 void
14300 ix86_output_addr_vec_elt (FILE *file, int value)
14301 {
14302 const char *directive = ASM_LONG;
14303
14304 #ifdef ASM_QUAD
14305 if (TARGET_64BIT)
14306 directive = ASM_QUAD;
14307 #else
14308 gcc_assert (!TARGET_64BIT);
14309 #endif
14310
14311 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
14312 }
14313
14314 void
14315 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
14316 {
14317 const char *directive = ASM_LONG;
14318
14319 #ifdef ASM_QUAD
14320 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
14321 directive = ASM_QUAD;
14322 #else
14323 gcc_assert (!TARGET_64BIT);
14324 #endif
14325 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
14326 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
14327 fprintf (file, "%s%s%d-%s%d\n",
14328 directive, LPREFIX, value, LPREFIX, rel);
14329 else if (HAVE_AS_GOTOFF_IN_DATA)
14330 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
14331 #if TARGET_MACHO
14332 else if (TARGET_MACHO)
14333 {
14334 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
14335 machopic_output_function_base_name (file);
14336 putc ('\n', file);
14337 }
14338 #endif
14339 else
14340 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
14341 GOT_SYMBOL_NAME, LPREFIX, value);
14342 }
14343 \f
14344 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
14345 for the target. */
14346
14347 void
14348 ix86_expand_clear (rtx dest)
14349 {
14350 rtx tmp;
14351
14352 /* We play register width games, which are only valid after reload. */
14353 gcc_assert (reload_completed);
14354
14355 /* Avoid HImode and its attendant prefix byte. */
14356 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
14357 dest = gen_rtx_REG (SImode, REGNO (dest));
14358 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
14359
14360 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
14361 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
14362 {
14363 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14364 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
14365 }
14366
14367 emit_insn (tmp);
14368 }
14369
14370 /* X is an unchanging MEM. If it is a constant pool reference, return
14371 the constant pool rtx, else NULL. */
14372
14373 rtx
14374 maybe_get_pool_constant (rtx x)
14375 {
14376 x = ix86_delegitimize_address (XEXP (x, 0));
14377
14378 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
14379 return get_pool_constant (x);
14380
14381 return NULL_RTX;
14382 }
14383
14384 void
14385 ix86_expand_move (enum machine_mode mode, rtx operands[])
14386 {
14387 rtx op0, op1;
14388 enum tls_model model;
14389
14390 op0 = operands[0];
14391 op1 = operands[1];
14392
14393 if (GET_CODE (op1) == SYMBOL_REF)
14394 {
14395 model = SYMBOL_REF_TLS_MODEL (op1);
14396 if (model)
14397 {
14398 op1 = legitimize_tls_address (op1, model, true);
14399 op1 = force_operand (op1, op0);
14400 if (op1 == op0)
14401 return;
14402 }
14403 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
14404 && SYMBOL_REF_DLLIMPORT_P (op1))
14405 op1 = legitimize_dllimport_symbol (op1, false);
14406 }
14407 else if (GET_CODE (op1) == CONST
14408 && GET_CODE (XEXP (op1, 0)) == PLUS
14409 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
14410 {
14411 rtx addend = XEXP (XEXP (op1, 0), 1);
14412 rtx symbol = XEXP (XEXP (op1, 0), 0);
14413 rtx tmp = NULL;
14414
14415 model = SYMBOL_REF_TLS_MODEL (symbol);
14416 if (model)
14417 tmp = legitimize_tls_address (symbol, model, true);
14418 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
14419 && SYMBOL_REF_DLLIMPORT_P (symbol))
14420 tmp = legitimize_dllimport_symbol (symbol, true);
14421
14422 if (tmp)
14423 {
14424 tmp = force_operand (tmp, NULL);
14425 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
14426 op0, 1, OPTAB_DIRECT);
14427 if (tmp == op0)
14428 return;
14429 }
14430 }
14431
14432 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
14433 {
14434 if (TARGET_MACHO && !TARGET_64BIT)
14435 {
14436 #if TARGET_MACHO
14437 if (MACHOPIC_PURE)
14438 {
14439 rtx temp = ((reload_in_progress
14440 || ((op0 && REG_P (op0))
14441 && mode == Pmode))
14442 ? op0 : gen_reg_rtx (Pmode));
14443 op1 = machopic_indirect_data_reference (op1, temp);
14444 op1 = machopic_legitimize_pic_address (op1, mode,
14445 temp == op1 ? 0 : temp);
14446 }
14447 else if (MACHOPIC_INDIRECT)
14448 op1 = machopic_indirect_data_reference (op1, 0);
14449 if (op0 == op1)
14450 return;
14451 #endif
14452 }
14453 else
14454 {
14455 if (MEM_P (op0))
14456 op1 = force_reg (Pmode, op1);
14457 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
14458 {
14459 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
14460 op1 = legitimize_pic_address (op1, reg);
14461 if (op0 == op1)
14462 return;
14463 }
14464 }
14465 }
14466 else
14467 {
14468 if (MEM_P (op0)
14469 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
14470 || !push_operand (op0, mode))
14471 && MEM_P (op1))
14472 op1 = force_reg (mode, op1);
14473
14474 if (push_operand (op0, mode)
14475 && ! general_no_elim_operand (op1, mode))
14476 op1 = copy_to_mode_reg (mode, op1);
14477
14478 /* Force large constants in 64bit compilation into register
14479 to get them CSEed. */
14480 if (can_create_pseudo_p ()
14481 && (mode == DImode) && TARGET_64BIT
14482 && immediate_operand (op1, mode)
14483 && !x86_64_zext_immediate_operand (op1, VOIDmode)
14484 && !register_operand (op0, mode)
14485 && optimize)
14486 op1 = copy_to_mode_reg (mode, op1);
14487
14488 if (can_create_pseudo_p ()
14489 && FLOAT_MODE_P (mode)
14490 && GET_CODE (op1) == CONST_DOUBLE)
14491 {
14492 /* If we are loading a floating point constant to a register,
14493 force the value to memory now, since we'll get better code
14494 out the back end. */
14495
14496 op1 = validize_mem (force_const_mem (mode, op1));
14497 if (!register_operand (op0, mode))
14498 {
14499 rtx temp = gen_reg_rtx (mode);
14500 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
14501 emit_move_insn (op0, temp);
14502 return;
14503 }
14504 }
14505 }
14506
14507 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
14508 }
14509
14510 void
14511 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
14512 {
14513 rtx op0 = operands[0], op1 = operands[1];
14514 unsigned int align = GET_MODE_ALIGNMENT (mode);
14515
14516 /* Force constants other than zero into memory. We do not know how
14517 the instructions used to build constants modify the upper 64 bits
14518 of the register, once we have that information we may be able
14519 to handle some of them more efficiently. */
14520 if (can_create_pseudo_p ()
14521 && register_operand (op0, mode)
14522 && (CONSTANT_P (op1)
14523 || (GET_CODE (op1) == SUBREG
14524 && CONSTANT_P (SUBREG_REG (op1))))
14525 && !standard_sse_constant_p (op1))
14526 op1 = validize_mem (force_const_mem (mode, op1));
14527
14528 /* We need to check memory alignment for SSE mode since attribute
14529 can make operands unaligned. */
14530 if (can_create_pseudo_p ()
14531 && SSE_REG_MODE_P (mode)
14532 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
14533 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
14534 {
14535 rtx tmp[2];
14536
14537 /* ix86_expand_vector_move_misalign() does not like constants ... */
14538 if (CONSTANT_P (op1)
14539 || (GET_CODE (op1) == SUBREG
14540 && CONSTANT_P (SUBREG_REG (op1))))
14541 op1 = validize_mem (force_const_mem (mode, op1));
14542
14543 /* ... nor both arguments in memory. */
14544 if (!register_operand (op0, mode)
14545 && !register_operand (op1, mode))
14546 op1 = force_reg (mode, op1);
14547
14548 tmp[0] = op0; tmp[1] = op1;
14549 ix86_expand_vector_move_misalign (mode, tmp);
14550 return;
14551 }
14552
14553 /* Make operand1 a register if it isn't already. */
14554 if (can_create_pseudo_p ()
14555 && !register_operand (op0, mode)
14556 && !register_operand (op1, mode))
14557 {
14558 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
14559 return;
14560 }
14561
14562 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
14563 }
14564
14565 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
14566 straight to ix86_expand_vector_move. */
14567 /* Code generation for scalar reg-reg moves of single and double precision data:
14568 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
14569 movaps reg, reg
14570 else
14571 movss reg, reg
14572 if (x86_sse_partial_reg_dependency == true)
14573 movapd reg, reg
14574 else
14575 movsd reg, reg
14576
14577 Code generation for scalar loads of double precision data:
14578 if (x86_sse_split_regs == true)
14579 movlpd mem, reg (gas syntax)
14580 else
14581 movsd mem, reg
14582
14583 Code generation for unaligned packed loads of single precision data
14584 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
14585 if (x86_sse_unaligned_move_optimal)
14586 movups mem, reg
14587
14588 if (x86_sse_partial_reg_dependency == true)
14589 {
14590 xorps reg, reg
14591 movlps mem, reg
14592 movhps mem+8, reg
14593 }
14594 else
14595 {
14596 movlps mem, reg
14597 movhps mem+8, reg
14598 }
14599
14600 Code generation for unaligned packed loads of double precision data
14601 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
14602 if (x86_sse_unaligned_move_optimal)
14603 movupd mem, reg
14604
14605 if (x86_sse_split_regs == true)
14606 {
14607 movlpd mem, reg
14608 movhpd mem+8, reg
14609 }
14610 else
14611 {
14612 movsd mem, reg
14613 movhpd mem+8, reg
14614 }
14615 */
14616
14617 void
14618 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
14619 {
14620 rtx op0, op1, m;
14621
14622 op0 = operands[0];
14623 op1 = operands[1];
14624
14625 if (TARGET_AVX)
14626 {
14627 switch (GET_MODE_CLASS (mode))
14628 {
14629 case MODE_VECTOR_INT:
14630 case MODE_INT:
14631 switch (GET_MODE_SIZE (mode))
14632 {
14633 case 16:
14634 /* If we're optimizing for size, movups is the smallest. */
14635 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14636 {
14637 op0 = gen_lowpart (V4SFmode, op0);
14638 op1 = gen_lowpart (V4SFmode, op1);
14639 emit_insn (gen_avx_movups (op0, op1));
14640 return;
14641 }
14642 op0 = gen_lowpart (V16QImode, op0);
14643 op1 = gen_lowpart (V16QImode, op1);
14644 emit_insn (gen_avx_movdqu (op0, op1));
14645 break;
14646 case 32:
14647 op0 = gen_lowpart (V32QImode, op0);
14648 op1 = gen_lowpart (V32QImode, op1);
14649 emit_insn (gen_avx_movdqu256 (op0, op1));
14650 break;
14651 default:
14652 gcc_unreachable ();
14653 }
14654 break;
14655 case MODE_VECTOR_FLOAT:
14656 op0 = gen_lowpart (mode, op0);
14657 op1 = gen_lowpart (mode, op1);
14658
14659 switch (mode)
14660 {
14661 case V4SFmode:
14662 emit_insn (gen_avx_movups (op0, op1));
14663 break;
14664 case V8SFmode:
14665 emit_insn (gen_avx_movups256 (op0, op1));
14666 break;
14667 case V2DFmode:
14668 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14669 {
14670 op0 = gen_lowpart (V4SFmode, op0);
14671 op1 = gen_lowpart (V4SFmode, op1);
14672 emit_insn (gen_avx_movups (op0, op1));
14673 return;
14674 }
14675 emit_insn (gen_avx_movupd (op0, op1));
14676 break;
14677 case V4DFmode:
14678 emit_insn (gen_avx_movupd256 (op0, op1));
14679 break;
14680 default:
14681 gcc_unreachable ();
14682 }
14683 break;
14684
14685 default:
14686 gcc_unreachable ();
14687 }
14688
14689 return;
14690 }
14691
14692 if (MEM_P (op1))
14693 {
14694 /* If we're optimizing for size, movups is the smallest. */
14695 if (optimize_insn_for_size_p ()
14696 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14697 {
14698 op0 = gen_lowpart (V4SFmode, op0);
14699 op1 = gen_lowpart (V4SFmode, op1);
14700 emit_insn (gen_sse_movups (op0, op1));
14701 return;
14702 }
14703
14704 /* ??? If we have typed data, then it would appear that using
14705 movdqu is the only way to get unaligned data loaded with
14706 integer type. */
14707 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14708 {
14709 op0 = gen_lowpart (V16QImode, op0);
14710 op1 = gen_lowpart (V16QImode, op1);
14711 emit_insn (gen_sse2_movdqu (op0, op1));
14712 return;
14713 }
14714
14715 if (TARGET_SSE2 && mode == V2DFmode)
14716 {
14717 rtx zero;
14718
14719 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14720 {
14721 op0 = gen_lowpart (V2DFmode, op0);
14722 op1 = gen_lowpart (V2DFmode, op1);
14723 emit_insn (gen_sse2_movupd (op0, op1));
14724 return;
14725 }
14726
14727 /* When SSE registers are split into halves, we can avoid
14728 writing to the top half twice. */
14729 if (TARGET_SSE_SPLIT_REGS)
14730 {
14731 emit_clobber (op0);
14732 zero = op0;
14733 }
14734 else
14735 {
14736 /* ??? Not sure about the best option for the Intel chips.
14737 The following would seem to satisfy; the register is
14738 entirely cleared, breaking the dependency chain. We
14739 then store to the upper half, with a dependency depth
14740 of one. A rumor has it that Intel recommends two movsd
14741 followed by an unpacklpd, but this is unconfirmed. And
14742 given that the dependency depth of the unpacklpd would
14743 still be one, I'm not sure why this would be better. */
14744 zero = CONST0_RTX (V2DFmode);
14745 }
14746
14747 m = adjust_address (op1, DFmode, 0);
14748 emit_insn (gen_sse2_loadlpd (op0, zero, m));
14749 m = adjust_address (op1, DFmode, 8);
14750 emit_insn (gen_sse2_loadhpd (op0, op0, m));
14751 }
14752 else
14753 {
14754 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14755 {
14756 op0 = gen_lowpart (V4SFmode, op0);
14757 op1 = gen_lowpart (V4SFmode, op1);
14758 emit_insn (gen_sse_movups (op0, op1));
14759 return;
14760 }
14761
14762 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
14763 emit_move_insn (op0, CONST0_RTX (mode));
14764 else
14765 emit_clobber (op0);
14766
14767 if (mode != V4SFmode)
14768 op0 = gen_lowpart (V4SFmode, op0);
14769 m = adjust_address (op1, V2SFmode, 0);
14770 emit_insn (gen_sse_loadlps (op0, op0, m));
14771 m = adjust_address (op1, V2SFmode, 8);
14772 emit_insn (gen_sse_loadhps (op0, op0, m));
14773 }
14774 }
14775 else if (MEM_P (op0))
14776 {
14777 /* If we're optimizing for size, movups is the smallest. */
14778 if (optimize_insn_for_size_p ()
14779 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14780 {
14781 op0 = gen_lowpart (V4SFmode, op0);
14782 op1 = gen_lowpart (V4SFmode, op1);
14783 emit_insn (gen_sse_movups (op0, op1));
14784 return;
14785 }
14786
14787 /* ??? Similar to above, only less clear because of quote
14788 typeless stores unquote. */
14789 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
14790 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14791 {
14792 op0 = gen_lowpart (V16QImode, op0);
14793 op1 = gen_lowpart (V16QImode, op1);
14794 emit_insn (gen_sse2_movdqu (op0, op1));
14795 return;
14796 }
14797
14798 if (TARGET_SSE2 && mode == V2DFmode)
14799 {
14800 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14801 {
14802 op0 = gen_lowpart (V2DFmode, op0);
14803 op1 = gen_lowpart (V2DFmode, op1);
14804 emit_insn (gen_sse2_movupd (op0, op1));
14805 }
14806 else
14807 {
14808 m = adjust_address (op0, DFmode, 0);
14809 emit_insn (gen_sse2_storelpd (m, op1));
14810 m = adjust_address (op0, DFmode, 8);
14811 emit_insn (gen_sse2_storehpd (m, op1));
14812 }
14813 }
14814 else
14815 {
14816 if (mode != V4SFmode)
14817 op1 = gen_lowpart (V4SFmode, op1);
14818
14819 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14820 {
14821 op0 = gen_lowpart (V4SFmode, op0);
14822 emit_insn (gen_sse_movups (op0, op1));
14823 }
14824 else
14825 {
14826 m = adjust_address (op0, V2SFmode, 0);
14827 emit_insn (gen_sse_storelps (m, op1));
14828 m = adjust_address (op0, V2SFmode, 8);
14829 emit_insn (gen_sse_storehps (m, op1));
14830 }
14831 }
14832 }
14833 else
14834 gcc_unreachable ();
14835 }
14836
14837 /* Expand a push in MODE. This is some mode for which we do not support
14838 proper push instructions, at least from the registers that we expect
14839 the value to live in. */
14840
14841 void
14842 ix86_expand_push (enum machine_mode mode, rtx x)
14843 {
14844 rtx tmp;
14845
14846 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
14847 GEN_INT (-GET_MODE_SIZE (mode)),
14848 stack_pointer_rtx, 1, OPTAB_DIRECT);
14849 if (tmp != stack_pointer_rtx)
14850 emit_move_insn (stack_pointer_rtx, tmp);
14851
14852 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
14853
14854 /* When we push an operand onto stack, it has to be aligned at least
14855 at the function argument boundary. However since we don't have
14856 the argument type, we can't determine the actual argument
14857 boundary. */
14858 emit_move_insn (tmp, x);
14859 }
14860
14861 /* Helper function of ix86_fixup_binary_operands to canonicalize
14862 operand order. Returns true if the operands should be swapped. */
14863
14864 static bool
14865 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
14866 rtx operands[])
14867 {
14868 rtx dst = operands[0];
14869 rtx src1 = operands[1];
14870 rtx src2 = operands[2];
14871
14872 /* If the operation is not commutative, we can't do anything. */
14873 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
14874 return false;
14875
14876 /* Highest priority is that src1 should match dst. */
14877 if (rtx_equal_p (dst, src1))
14878 return false;
14879 if (rtx_equal_p (dst, src2))
14880 return true;
14881
14882 /* Next highest priority is that immediate constants come second. */
14883 if (immediate_operand (src2, mode))
14884 return false;
14885 if (immediate_operand (src1, mode))
14886 return true;
14887
14888 /* Lowest priority is that memory references should come second. */
14889 if (MEM_P (src2))
14890 return false;
14891 if (MEM_P (src1))
14892 return true;
14893
14894 return false;
14895 }
14896
14897
14898 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
14899 destination to use for the operation. If different from the true
14900 destination in operands[0], a copy operation will be required. */
14901
14902 rtx
14903 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
14904 rtx operands[])
14905 {
14906 rtx dst = operands[0];
14907 rtx src1 = operands[1];
14908 rtx src2 = operands[2];
14909
14910 /* Canonicalize operand order. */
14911 if (ix86_swap_binary_operands_p (code, mode, operands))
14912 {
14913 rtx temp;
14914
14915 /* It is invalid to swap operands of different modes. */
14916 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
14917
14918 temp = src1;
14919 src1 = src2;
14920 src2 = temp;
14921 }
14922
14923 /* Both source operands cannot be in memory. */
14924 if (MEM_P (src1) && MEM_P (src2))
14925 {
14926 /* Optimization: Only read from memory once. */
14927 if (rtx_equal_p (src1, src2))
14928 {
14929 src2 = force_reg (mode, src2);
14930 src1 = src2;
14931 }
14932 else
14933 src2 = force_reg (mode, src2);
14934 }
14935
14936 /* If the destination is memory, and we do not have matching source
14937 operands, do things in registers. */
14938 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
14939 dst = gen_reg_rtx (mode);
14940
14941 /* Source 1 cannot be a constant. */
14942 if (CONSTANT_P (src1))
14943 src1 = force_reg (mode, src1);
14944
14945 /* Source 1 cannot be a non-matching memory. */
14946 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
14947 src1 = force_reg (mode, src1);
14948
14949 operands[1] = src1;
14950 operands[2] = src2;
14951 return dst;
14952 }
14953
14954 /* Similarly, but assume that the destination has already been
14955 set up properly. */
14956
14957 void
14958 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
14959 enum machine_mode mode, rtx operands[])
14960 {
14961 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
14962 gcc_assert (dst == operands[0]);
14963 }
14964
14965 /* Attempt to expand a binary operator. Make the expansion closer to the
14966 actual machine, then just general_operand, which will allow 3 separate
14967 memory references (one output, two input) in a single insn. */
14968
14969 void
14970 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
14971 rtx operands[])
14972 {
14973 rtx src1, src2, dst, op, clob;
14974
14975 dst = ix86_fixup_binary_operands (code, mode, operands);
14976 src1 = operands[1];
14977 src2 = operands[2];
14978
14979 /* Emit the instruction. */
14980
14981 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
14982 if (reload_in_progress)
14983 {
14984 /* Reload doesn't know about the flags register, and doesn't know that
14985 it doesn't want to clobber it. We can only do this with PLUS. */
14986 gcc_assert (code == PLUS);
14987 emit_insn (op);
14988 }
14989 else
14990 {
14991 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14992 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
14993 }
14994
14995 /* Fix up the destination if needed. */
14996 if (dst != operands[0])
14997 emit_move_insn (operands[0], dst);
14998 }
14999
15000 /* Return TRUE or FALSE depending on whether the binary operator meets the
15001 appropriate constraints. */
15002
15003 bool
15004 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
15005 rtx operands[3])
15006 {
15007 rtx dst = operands[0];
15008 rtx src1 = operands[1];
15009 rtx src2 = operands[2];
15010
15011 /* Both source operands cannot be in memory. */
15012 if (MEM_P (src1) && MEM_P (src2))
15013 return false;
15014
15015 /* Canonicalize operand order for commutative operators. */
15016 if (ix86_swap_binary_operands_p (code, mode, operands))
15017 {
15018 rtx temp = src1;
15019 src1 = src2;
15020 src2 = temp;
15021 }
15022
15023 /* If the destination is memory, we must have a matching source operand. */
15024 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15025 return false;
15026
15027 /* Source 1 cannot be a constant. */
15028 if (CONSTANT_P (src1))
15029 return false;
15030
15031 /* Source 1 cannot be a non-matching memory. */
15032 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15033 {
15034 /* Support "andhi/andsi/anddi" as a zero-extending move. */
15035 return (code == AND
15036 && (mode == HImode
15037 || mode == SImode
15038 || (TARGET_64BIT && mode == DImode))
15039 && CONST_INT_P (src2)
15040 && (INTVAL (src2) == 0xff
15041 || INTVAL (src2) == 0xffff));
15042 }
15043
15044 return true;
15045 }
15046
15047 /* Attempt to expand a unary operator. Make the expansion closer to the
15048 actual machine, then just general_operand, which will allow 2 separate
15049 memory references (one output, one input) in a single insn. */
15050
15051 void
15052 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
15053 rtx operands[])
15054 {
15055 int matching_memory;
15056 rtx src, dst, op, clob;
15057
15058 dst = operands[0];
15059 src = operands[1];
15060
15061 /* If the destination is memory, and we do not have matching source
15062 operands, do things in registers. */
15063 matching_memory = 0;
15064 if (MEM_P (dst))
15065 {
15066 if (rtx_equal_p (dst, src))
15067 matching_memory = 1;
15068 else
15069 dst = gen_reg_rtx (mode);
15070 }
15071
15072 /* When source operand is memory, destination must match. */
15073 if (MEM_P (src) && !matching_memory)
15074 src = force_reg (mode, src);
15075
15076 /* Emit the instruction. */
15077
15078 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
15079 if (reload_in_progress || code == NOT)
15080 {
15081 /* Reload doesn't know about the flags register, and doesn't know that
15082 it doesn't want to clobber it. */
15083 gcc_assert (code == NOT);
15084 emit_insn (op);
15085 }
15086 else
15087 {
15088 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15089 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15090 }
15091
15092 /* Fix up the destination if needed. */
15093 if (dst != operands[0])
15094 emit_move_insn (operands[0], dst);
15095 }
15096
15097 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
15098 divisor are within the the range [0-255]. */
15099
15100 void
15101 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
15102 bool signed_p)
15103 {
15104 rtx end_label, qimode_label;
15105 rtx insn, div, mod;
15106 rtx scratch, tmp0, tmp1, tmp2;
15107 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
15108 rtx (*gen_zero_extend) (rtx, rtx);
15109 rtx (*gen_test_ccno_1) (rtx, rtx);
15110
15111 switch (mode)
15112 {
15113 case SImode:
15114 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
15115 gen_test_ccno_1 = gen_testsi_ccno_1;
15116 gen_zero_extend = gen_zero_extendqisi2;
15117 break;
15118 case DImode:
15119 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
15120 gen_test_ccno_1 = gen_testdi_ccno_1;
15121 gen_zero_extend = gen_zero_extendqidi2;
15122 break;
15123 default:
15124 gcc_unreachable ();
15125 }
15126
15127 end_label = gen_label_rtx ();
15128 qimode_label = gen_label_rtx ();
15129
15130 scratch = gen_reg_rtx (mode);
15131
15132 /* Use 8bit unsigned divimod if dividend and divisor are within the
15133 the range [0-255]. */
15134 emit_move_insn (scratch, operands[2]);
15135 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
15136 scratch, 1, OPTAB_DIRECT);
15137 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
15138 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
15139 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
15140 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
15141 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
15142 pc_rtx);
15143 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
15144 predict_jump (REG_BR_PROB_BASE * 50 / 100);
15145 JUMP_LABEL (insn) = qimode_label;
15146
15147 /* Generate original signed/unsigned divimod. */
15148 div = gen_divmod4_1 (operands[0], operands[1],
15149 operands[2], operands[3]);
15150 emit_insn (div);
15151
15152 /* Branch to the end. */
15153 emit_jump_insn (gen_jump (end_label));
15154 emit_barrier ();
15155
15156 /* Generate 8bit unsigned divide. */
15157 emit_label (qimode_label);
15158 /* Don't use operands[0] for result of 8bit divide since not all
15159 registers support QImode ZERO_EXTRACT. */
15160 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
15161 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
15162 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
15163 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
15164
15165 if (signed_p)
15166 {
15167 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
15168 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
15169 }
15170 else
15171 {
15172 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
15173 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
15174 }
15175
15176 /* Extract remainder from AH. */
15177 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
15178 if (REG_P (operands[1]))
15179 insn = emit_move_insn (operands[1], tmp1);
15180 else
15181 {
15182 /* Need a new scratch register since the old one has result
15183 of 8bit divide. */
15184 scratch = gen_reg_rtx (mode);
15185 emit_move_insn (scratch, tmp1);
15186 insn = emit_move_insn (operands[1], scratch);
15187 }
15188 set_unique_reg_note (insn, REG_EQUAL, mod);
15189
15190 /* Zero extend quotient from AL. */
15191 tmp1 = gen_lowpart (QImode, tmp0);
15192 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
15193 set_unique_reg_note (insn, REG_EQUAL, div);
15194
15195 emit_label (end_label);
15196 }
15197
15198 #define LEA_SEARCH_THRESHOLD 12
15199
15200 /* Search backward for non-agu definition of register number REGNO1
15201 or register number REGNO2 in INSN's basic block until
15202 1. Pass LEA_SEARCH_THRESHOLD instructions, or
15203 2. Reach BB boundary, or
15204 3. Reach agu definition.
15205 Returns the distance between the non-agu definition point and INSN.
15206 If no definition point, returns -1. */
15207
15208 static int
15209 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
15210 rtx insn)
15211 {
15212 basic_block bb = BLOCK_FOR_INSN (insn);
15213 int distance = 0;
15214 df_ref *def_rec;
15215 enum attr_type insn_type;
15216
15217 if (insn != BB_HEAD (bb))
15218 {
15219 rtx prev = PREV_INSN (insn);
15220 while (prev && distance < LEA_SEARCH_THRESHOLD)
15221 {
15222 if (NONDEBUG_INSN_P (prev))
15223 {
15224 distance++;
15225 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15226 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15227 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15228 && (regno1 == DF_REF_REGNO (*def_rec)
15229 || regno2 == DF_REF_REGNO (*def_rec)))
15230 {
15231 insn_type = get_attr_type (prev);
15232 if (insn_type != TYPE_LEA)
15233 goto done;
15234 }
15235 }
15236 if (prev == BB_HEAD (bb))
15237 break;
15238 prev = PREV_INSN (prev);
15239 }
15240 }
15241
15242 if (distance < LEA_SEARCH_THRESHOLD)
15243 {
15244 edge e;
15245 edge_iterator ei;
15246 bool simple_loop = false;
15247
15248 FOR_EACH_EDGE (e, ei, bb->preds)
15249 if (e->src == bb)
15250 {
15251 simple_loop = true;
15252 break;
15253 }
15254
15255 if (simple_loop)
15256 {
15257 rtx prev = BB_END (bb);
15258 while (prev
15259 && prev != insn
15260 && distance < LEA_SEARCH_THRESHOLD)
15261 {
15262 if (NONDEBUG_INSN_P (prev))
15263 {
15264 distance++;
15265 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15266 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15267 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15268 && (regno1 == DF_REF_REGNO (*def_rec)
15269 || regno2 == DF_REF_REGNO (*def_rec)))
15270 {
15271 insn_type = get_attr_type (prev);
15272 if (insn_type != TYPE_LEA)
15273 goto done;
15274 }
15275 }
15276 prev = PREV_INSN (prev);
15277 }
15278 }
15279 }
15280
15281 distance = -1;
15282
15283 done:
15284 /* get_attr_type may modify recog data. We want to make sure
15285 that recog data is valid for instruction INSN, on which
15286 distance_non_agu_define is called. INSN is unchanged here. */
15287 extract_insn_cached (insn);
15288 return distance;
15289 }
15290
15291 /* Return the distance between INSN and the next insn that uses
15292 register number REGNO0 in memory address. Return -1 if no such
15293 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
15294
15295 static int
15296 distance_agu_use (unsigned int regno0, rtx insn)
15297 {
15298 basic_block bb = BLOCK_FOR_INSN (insn);
15299 int distance = 0;
15300 df_ref *def_rec;
15301 df_ref *use_rec;
15302
15303 if (insn != BB_END (bb))
15304 {
15305 rtx next = NEXT_INSN (insn);
15306 while (next && distance < LEA_SEARCH_THRESHOLD)
15307 {
15308 if (NONDEBUG_INSN_P (next))
15309 {
15310 distance++;
15311
15312 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
15313 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
15314 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
15315 && regno0 == DF_REF_REGNO (*use_rec))
15316 {
15317 /* Return DISTANCE if OP0 is used in memory
15318 address in NEXT. */
15319 return distance;
15320 }
15321
15322 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
15323 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15324 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15325 && regno0 == DF_REF_REGNO (*def_rec))
15326 {
15327 /* Return -1 if OP0 is set in NEXT. */
15328 return -1;
15329 }
15330 }
15331 if (next == BB_END (bb))
15332 break;
15333 next = NEXT_INSN (next);
15334 }
15335 }
15336
15337 if (distance < LEA_SEARCH_THRESHOLD)
15338 {
15339 edge e;
15340 edge_iterator ei;
15341 bool simple_loop = false;
15342
15343 FOR_EACH_EDGE (e, ei, bb->succs)
15344 if (e->dest == bb)
15345 {
15346 simple_loop = true;
15347 break;
15348 }
15349
15350 if (simple_loop)
15351 {
15352 rtx next = BB_HEAD (bb);
15353 while (next
15354 && next != insn
15355 && distance < LEA_SEARCH_THRESHOLD)
15356 {
15357 if (NONDEBUG_INSN_P (next))
15358 {
15359 distance++;
15360
15361 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
15362 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
15363 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
15364 && regno0 == DF_REF_REGNO (*use_rec))
15365 {
15366 /* Return DISTANCE if OP0 is used in memory
15367 address in NEXT. */
15368 return distance;
15369 }
15370
15371 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
15372 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15373 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15374 && regno0 == DF_REF_REGNO (*def_rec))
15375 {
15376 /* Return -1 if OP0 is set in NEXT. */
15377 return -1;
15378 }
15379
15380 }
15381 next = NEXT_INSN (next);
15382 }
15383 }
15384 }
15385
15386 return -1;
15387 }
15388
15389 /* Define this macro to tune LEA priority vs ADD, it take effect when
15390 there is a dilemma of choicing LEA or ADD
15391 Negative value: ADD is more preferred than LEA
15392 Zero: Netrual
15393 Positive value: LEA is more preferred than ADD*/
15394 #define IX86_LEA_PRIORITY 2
15395
15396 /* Return true if it is ok to optimize an ADD operation to LEA
15397 operation to avoid flag register consumation. For most processors,
15398 ADD is faster than LEA. For the processors like ATOM, if the
15399 destination register of LEA holds an actual address which will be
15400 used soon, LEA is better and otherwise ADD is better. */
15401
15402 bool
15403 ix86_lea_for_add_ok (rtx insn, rtx operands[])
15404 {
15405 unsigned int regno0 = true_regnum (operands[0]);
15406 unsigned int regno1 = true_regnum (operands[1]);
15407 unsigned int regno2 = true_regnum (operands[2]);
15408
15409 /* If a = b + c, (a!=b && a!=c), must use lea form. */
15410 if (regno0 != regno1 && regno0 != regno2)
15411 return true;
15412
15413 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
15414 return false;
15415 else
15416 {
15417 int dist_define, dist_use;
15418
15419 /* Return false if REGNO0 isn't used in memory address. */
15420 dist_use = distance_agu_use (regno0, insn);
15421 if (dist_use <= 0)
15422 return false;
15423
15424 dist_define = distance_non_agu_define (regno1, regno2, insn);
15425 if (dist_define <= 0)
15426 return true;
15427
15428 /* If this insn has both backward non-agu dependence and forward
15429 agu dependence, the one with short distance take effect. */
15430 if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
15431 return false;
15432
15433 return true;
15434 }
15435 }
15436
15437 /* Return true if destination reg of SET_BODY is shift count of
15438 USE_BODY. */
15439
15440 static bool
15441 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
15442 {
15443 rtx set_dest;
15444 rtx shift_rtx;
15445 int i;
15446
15447 /* Retrieve destination of SET_BODY. */
15448 switch (GET_CODE (set_body))
15449 {
15450 case SET:
15451 set_dest = SET_DEST (set_body);
15452 if (!set_dest || !REG_P (set_dest))
15453 return false;
15454 break;
15455 case PARALLEL:
15456 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
15457 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
15458 use_body))
15459 return true;
15460 default:
15461 return false;
15462 break;
15463 }
15464
15465 /* Retrieve shift count of USE_BODY. */
15466 switch (GET_CODE (use_body))
15467 {
15468 case SET:
15469 shift_rtx = XEXP (use_body, 1);
15470 break;
15471 case PARALLEL:
15472 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
15473 if (ix86_dep_by_shift_count_body (set_body,
15474 XVECEXP (use_body, 0, i)))
15475 return true;
15476 default:
15477 return false;
15478 break;
15479 }
15480
15481 if (shift_rtx
15482 && (GET_CODE (shift_rtx) == ASHIFT
15483 || GET_CODE (shift_rtx) == LSHIFTRT
15484 || GET_CODE (shift_rtx) == ASHIFTRT
15485 || GET_CODE (shift_rtx) == ROTATE
15486 || GET_CODE (shift_rtx) == ROTATERT))
15487 {
15488 rtx shift_count = XEXP (shift_rtx, 1);
15489
15490 /* Return true if shift count is dest of SET_BODY. */
15491 if (REG_P (shift_count)
15492 && true_regnum (set_dest) == true_regnum (shift_count))
15493 return true;
15494 }
15495
15496 return false;
15497 }
15498
15499 /* Return true if destination reg of SET_INSN is shift count of
15500 USE_INSN. */
15501
15502 bool
15503 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
15504 {
15505 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
15506 PATTERN (use_insn));
15507 }
15508
15509 /* Return TRUE or FALSE depending on whether the unary operator meets the
15510 appropriate constraints. */
15511
15512 bool
15513 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
15514 enum machine_mode mode ATTRIBUTE_UNUSED,
15515 rtx operands[2] ATTRIBUTE_UNUSED)
15516 {
15517 /* If one of operands is memory, source and destination must match. */
15518 if ((MEM_P (operands[0])
15519 || MEM_P (operands[1]))
15520 && ! rtx_equal_p (operands[0], operands[1]))
15521 return false;
15522 return true;
15523 }
15524
15525 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
15526 are ok, keeping in mind the possible movddup alternative. */
15527
15528 bool
15529 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
15530 {
15531 if (MEM_P (operands[0]))
15532 return rtx_equal_p (operands[0], operands[1 + high]);
15533 if (MEM_P (operands[1]) && MEM_P (operands[2]))
15534 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
15535 return true;
15536 }
15537
15538 /* Post-reload splitter for converting an SF or DFmode value in an
15539 SSE register into an unsigned SImode. */
15540
15541 void
15542 ix86_split_convert_uns_si_sse (rtx operands[])
15543 {
15544 enum machine_mode vecmode;
15545 rtx value, large, zero_or_two31, input, two31, x;
15546
15547 large = operands[1];
15548 zero_or_two31 = operands[2];
15549 input = operands[3];
15550 two31 = operands[4];
15551 vecmode = GET_MODE (large);
15552 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
15553
15554 /* Load up the value into the low element. We must ensure that the other
15555 elements are valid floats -- zero is the easiest such value. */
15556 if (MEM_P (input))
15557 {
15558 if (vecmode == V4SFmode)
15559 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
15560 else
15561 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
15562 }
15563 else
15564 {
15565 input = gen_rtx_REG (vecmode, REGNO (input));
15566 emit_move_insn (value, CONST0_RTX (vecmode));
15567 if (vecmode == V4SFmode)
15568 emit_insn (gen_sse_movss (value, value, input));
15569 else
15570 emit_insn (gen_sse2_movsd (value, value, input));
15571 }
15572
15573 emit_move_insn (large, two31);
15574 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
15575
15576 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
15577 emit_insn (gen_rtx_SET (VOIDmode, large, x));
15578
15579 x = gen_rtx_AND (vecmode, zero_or_two31, large);
15580 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
15581
15582 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
15583 emit_insn (gen_rtx_SET (VOIDmode, value, x));
15584
15585 large = gen_rtx_REG (V4SImode, REGNO (large));
15586 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
15587
15588 x = gen_rtx_REG (V4SImode, REGNO (value));
15589 if (vecmode == V4SFmode)
15590 emit_insn (gen_sse2_cvttps2dq (x, value));
15591 else
15592 emit_insn (gen_sse2_cvttpd2dq (x, value));
15593 value = x;
15594
15595 emit_insn (gen_xorv4si3 (value, value, large));
15596 }
15597
15598 /* Convert an unsigned DImode value into a DFmode, using only SSE.
15599 Expects the 64-bit DImode to be supplied in a pair of integral
15600 registers. Requires SSE2; will use SSE3 if available. For x86_32,
15601 -mfpmath=sse, !optimize_size only. */
15602
15603 void
15604 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
15605 {
15606 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
15607 rtx int_xmm, fp_xmm;
15608 rtx biases, exponents;
15609 rtx x;
15610
15611 int_xmm = gen_reg_rtx (V4SImode);
15612 if (TARGET_INTER_UNIT_MOVES)
15613 emit_insn (gen_movdi_to_sse (int_xmm, input));
15614 else if (TARGET_SSE_SPLIT_REGS)
15615 {
15616 emit_clobber (int_xmm);
15617 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
15618 }
15619 else
15620 {
15621 x = gen_reg_rtx (V2DImode);
15622 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
15623 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
15624 }
15625
15626 x = gen_rtx_CONST_VECTOR (V4SImode,
15627 gen_rtvec (4, GEN_INT (0x43300000UL),
15628 GEN_INT (0x45300000UL),
15629 const0_rtx, const0_rtx));
15630 exponents = validize_mem (force_const_mem (V4SImode, x));
15631
15632 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
15633 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
15634
15635 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
15636 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
15637 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
15638 (0x1.0p84 + double(fp_value_hi_xmm)).
15639 Note these exponents differ by 32. */
15640
15641 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
15642
15643 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
15644 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
15645 real_ldexp (&bias_lo_rvt, &dconst1, 52);
15646 real_ldexp (&bias_hi_rvt, &dconst1, 84);
15647 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
15648 x = const_double_from_real_value (bias_hi_rvt, DFmode);
15649 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
15650 biases = validize_mem (force_const_mem (V2DFmode, biases));
15651 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
15652
15653 /* Add the upper and lower DFmode values together. */
15654 if (TARGET_SSE3)
15655 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
15656 else
15657 {
15658 x = copy_to_mode_reg (V2DFmode, fp_xmm);
15659 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
15660 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
15661 }
15662
15663 ix86_expand_vector_extract (false, target, fp_xmm, 0);
15664 }
15665
15666 /* Not used, but eases macroization of patterns. */
15667 void
15668 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
15669 rtx input ATTRIBUTE_UNUSED)
15670 {
15671 gcc_unreachable ();
15672 }
15673
15674 /* Convert an unsigned SImode value into a DFmode. Only currently used
15675 for SSE, but applicable anywhere. */
15676
15677 void
15678 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
15679 {
15680 REAL_VALUE_TYPE TWO31r;
15681 rtx x, fp;
15682
15683 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
15684 NULL, 1, OPTAB_DIRECT);
15685
15686 fp = gen_reg_rtx (DFmode);
15687 emit_insn (gen_floatsidf2 (fp, x));
15688
15689 real_ldexp (&TWO31r, &dconst1, 31);
15690 x = const_double_from_real_value (TWO31r, DFmode);
15691
15692 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
15693 if (x != target)
15694 emit_move_insn (target, x);
15695 }
15696
15697 /* Convert a signed DImode value into a DFmode. Only used for SSE in
15698 32-bit mode; otherwise we have a direct convert instruction. */
15699
15700 void
15701 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
15702 {
15703 REAL_VALUE_TYPE TWO32r;
15704 rtx fp_lo, fp_hi, x;
15705
15706 fp_lo = gen_reg_rtx (DFmode);
15707 fp_hi = gen_reg_rtx (DFmode);
15708
15709 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
15710
15711 real_ldexp (&TWO32r, &dconst1, 32);
15712 x = const_double_from_real_value (TWO32r, DFmode);
15713 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
15714
15715 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
15716
15717 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
15718 0, OPTAB_DIRECT);
15719 if (x != target)
15720 emit_move_insn (target, x);
15721 }
15722
15723 /* Convert an unsigned SImode value into a SFmode, using only SSE.
15724 For x86_32, -mfpmath=sse, !optimize_size only. */
15725 void
15726 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
15727 {
15728 REAL_VALUE_TYPE ONE16r;
15729 rtx fp_hi, fp_lo, int_hi, int_lo, x;
15730
15731 real_ldexp (&ONE16r, &dconst1, 16);
15732 x = const_double_from_real_value (ONE16r, SFmode);
15733 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
15734 NULL, 0, OPTAB_DIRECT);
15735 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
15736 NULL, 0, OPTAB_DIRECT);
15737 fp_hi = gen_reg_rtx (SFmode);
15738 fp_lo = gen_reg_rtx (SFmode);
15739 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
15740 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
15741 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
15742 0, OPTAB_DIRECT);
15743 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
15744 0, OPTAB_DIRECT);
15745 if (!rtx_equal_p (target, fp_hi))
15746 emit_move_insn (target, fp_hi);
15747 }
15748
15749 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
15750 then replicate the value for all elements of the vector
15751 register. */
15752
15753 rtx
15754 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
15755 {
15756 rtvec v;
15757 switch (mode)
15758 {
15759 case SImode:
15760 gcc_assert (vect);
15761 v = gen_rtvec (4, value, value, value, value);
15762 return gen_rtx_CONST_VECTOR (V4SImode, v);
15763
15764 case DImode:
15765 gcc_assert (vect);
15766 v = gen_rtvec (2, value, value);
15767 return gen_rtx_CONST_VECTOR (V2DImode, v);
15768
15769 case SFmode:
15770 if (vect)
15771 v = gen_rtvec (4, value, value, value, value);
15772 else
15773 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
15774 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
15775 return gen_rtx_CONST_VECTOR (V4SFmode, v);
15776
15777 case DFmode:
15778 if (vect)
15779 v = gen_rtvec (2, value, value);
15780 else
15781 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
15782 return gen_rtx_CONST_VECTOR (V2DFmode, v);
15783
15784 default:
15785 gcc_unreachable ();
15786 }
15787 }
15788
15789 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
15790 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
15791 for an SSE register. If VECT is true, then replicate the mask for
15792 all elements of the vector register. If INVERT is true, then create
15793 a mask excluding the sign bit. */
15794
15795 rtx
15796 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
15797 {
15798 enum machine_mode vec_mode, imode;
15799 HOST_WIDE_INT hi, lo;
15800 int shift = 63;
15801 rtx v;
15802 rtx mask;
15803
15804 /* Find the sign bit, sign extended to 2*HWI. */
15805 switch (mode)
15806 {
15807 case SImode:
15808 case SFmode:
15809 imode = SImode;
15810 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
15811 lo = 0x80000000, hi = lo < 0;
15812 break;
15813
15814 case DImode:
15815 case DFmode:
15816 imode = DImode;
15817 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
15818 if (HOST_BITS_PER_WIDE_INT >= 64)
15819 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
15820 else
15821 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15822 break;
15823
15824 case TImode:
15825 case TFmode:
15826 vec_mode = VOIDmode;
15827 if (HOST_BITS_PER_WIDE_INT >= 64)
15828 {
15829 imode = TImode;
15830 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
15831 }
15832 else
15833 {
15834 rtvec vec;
15835
15836 imode = DImode;
15837 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15838
15839 if (invert)
15840 {
15841 lo = ~lo, hi = ~hi;
15842 v = constm1_rtx;
15843 }
15844 else
15845 v = const0_rtx;
15846
15847 mask = immed_double_const (lo, hi, imode);
15848
15849 vec = gen_rtvec (2, v, mask);
15850 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
15851 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
15852
15853 return v;
15854 }
15855 break;
15856
15857 default:
15858 gcc_unreachable ();
15859 }
15860
15861 if (invert)
15862 lo = ~lo, hi = ~hi;
15863
15864 /* Force this value into the low part of a fp vector constant. */
15865 mask = immed_double_const (lo, hi, imode);
15866 mask = gen_lowpart (mode, mask);
15867
15868 if (vec_mode == VOIDmode)
15869 return force_reg (mode, mask);
15870
15871 v = ix86_build_const_vector (mode, vect, mask);
15872 return force_reg (vec_mode, v);
15873 }
15874
15875 /* Generate code for floating point ABS or NEG. */
15876
15877 void
15878 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
15879 rtx operands[])
15880 {
15881 rtx mask, set, use, clob, dst, src;
15882 bool use_sse = false;
15883 bool vector_mode = VECTOR_MODE_P (mode);
15884 enum machine_mode elt_mode = mode;
15885
15886 if (vector_mode)
15887 {
15888 elt_mode = GET_MODE_INNER (mode);
15889 use_sse = true;
15890 }
15891 else if (mode == TFmode)
15892 use_sse = true;
15893 else if (TARGET_SSE_MATH)
15894 use_sse = SSE_FLOAT_MODE_P (mode);
15895
15896 /* NEG and ABS performed with SSE use bitwise mask operations.
15897 Create the appropriate mask now. */
15898 if (use_sse)
15899 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
15900 else
15901 mask = NULL_RTX;
15902
15903 dst = operands[0];
15904 src = operands[1];
15905
15906 if (vector_mode)
15907 {
15908 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
15909 set = gen_rtx_SET (VOIDmode, dst, set);
15910 emit_insn (set);
15911 }
15912 else
15913 {
15914 set = gen_rtx_fmt_e (code, mode, src);
15915 set = gen_rtx_SET (VOIDmode, dst, set);
15916 if (mask)
15917 {
15918 use = gen_rtx_USE (VOIDmode, mask);
15919 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15920 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15921 gen_rtvec (3, set, use, clob)));
15922 }
15923 else
15924 emit_insn (set);
15925 }
15926 }
15927
15928 /* Expand a copysign operation. Special case operand 0 being a constant. */
15929
15930 void
15931 ix86_expand_copysign (rtx operands[])
15932 {
15933 enum machine_mode mode;
15934 rtx dest, op0, op1, mask, nmask;
15935
15936 dest = operands[0];
15937 op0 = operands[1];
15938 op1 = operands[2];
15939
15940 mode = GET_MODE (dest);
15941
15942 if (GET_CODE (op0) == CONST_DOUBLE)
15943 {
15944 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
15945
15946 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
15947 op0 = simplify_unary_operation (ABS, mode, op0, mode);
15948
15949 if (mode == SFmode || mode == DFmode)
15950 {
15951 enum machine_mode vmode;
15952
15953 vmode = mode == SFmode ? V4SFmode : V2DFmode;
15954
15955 if (op0 == CONST0_RTX (mode))
15956 op0 = CONST0_RTX (vmode);
15957 else
15958 {
15959 rtx v = ix86_build_const_vector (mode, false, op0);
15960
15961 op0 = force_reg (vmode, v);
15962 }
15963 }
15964 else if (op0 != CONST0_RTX (mode))
15965 op0 = force_reg (mode, op0);
15966
15967 mask = ix86_build_signbit_mask (mode, 0, 0);
15968
15969 if (mode == SFmode)
15970 copysign_insn = gen_copysignsf3_const;
15971 else if (mode == DFmode)
15972 copysign_insn = gen_copysigndf3_const;
15973 else
15974 copysign_insn = gen_copysigntf3_const;
15975
15976 emit_insn (copysign_insn (dest, op0, op1, mask));
15977 }
15978 else
15979 {
15980 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
15981
15982 nmask = ix86_build_signbit_mask (mode, 0, 1);
15983 mask = ix86_build_signbit_mask (mode, 0, 0);
15984
15985 if (mode == SFmode)
15986 copysign_insn = gen_copysignsf3_var;
15987 else if (mode == DFmode)
15988 copysign_insn = gen_copysigndf3_var;
15989 else
15990 copysign_insn = gen_copysigntf3_var;
15991
15992 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
15993 }
15994 }
15995
15996 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
15997 be a constant, and so has already been expanded into a vector constant. */
15998
15999 void
16000 ix86_split_copysign_const (rtx operands[])
16001 {
16002 enum machine_mode mode, vmode;
16003 rtx dest, op0, mask, x;
16004
16005 dest = operands[0];
16006 op0 = operands[1];
16007 mask = operands[3];
16008
16009 mode = GET_MODE (dest);
16010 vmode = GET_MODE (mask);
16011
16012 dest = simplify_gen_subreg (vmode, dest, mode, 0);
16013 x = gen_rtx_AND (vmode, dest, mask);
16014 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16015
16016 if (op0 != CONST0_RTX (vmode))
16017 {
16018 x = gen_rtx_IOR (vmode, dest, op0);
16019 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16020 }
16021 }
16022
16023 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
16024 so we have to do two masks. */
16025
16026 void
16027 ix86_split_copysign_var (rtx operands[])
16028 {
16029 enum machine_mode mode, vmode;
16030 rtx dest, scratch, op0, op1, mask, nmask, x;
16031
16032 dest = operands[0];
16033 scratch = operands[1];
16034 op0 = operands[2];
16035 op1 = operands[3];
16036 nmask = operands[4];
16037 mask = operands[5];
16038
16039 mode = GET_MODE (dest);
16040 vmode = GET_MODE (mask);
16041
16042 if (rtx_equal_p (op0, op1))
16043 {
16044 /* Shouldn't happen often (it's useless, obviously), but when it does
16045 we'd generate incorrect code if we continue below. */
16046 emit_move_insn (dest, op0);
16047 return;
16048 }
16049
16050 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
16051 {
16052 gcc_assert (REGNO (op1) == REGNO (scratch));
16053
16054 x = gen_rtx_AND (vmode, scratch, mask);
16055 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16056
16057 dest = mask;
16058 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16059 x = gen_rtx_NOT (vmode, dest);
16060 x = gen_rtx_AND (vmode, x, op0);
16061 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16062 }
16063 else
16064 {
16065 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
16066 {
16067 x = gen_rtx_AND (vmode, scratch, mask);
16068 }
16069 else /* alternative 2,4 */
16070 {
16071 gcc_assert (REGNO (mask) == REGNO (scratch));
16072 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
16073 x = gen_rtx_AND (vmode, scratch, op1);
16074 }
16075 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16076
16077 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
16078 {
16079 dest = simplify_gen_subreg (vmode, op0, mode, 0);
16080 x = gen_rtx_AND (vmode, dest, nmask);
16081 }
16082 else /* alternative 3,4 */
16083 {
16084 gcc_assert (REGNO (nmask) == REGNO (dest));
16085 dest = nmask;
16086 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16087 x = gen_rtx_AND (vmode, dest, op0);
16088 }
16089 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16090 }
16091
16092 x = gen_rtx_IOR (vmode, dest, scratch);
16093 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16094 }
16095
16096 /* Return TRUE or FALSE depending on whether the first SET in INSN
16097 has source and destination with matching CC modes, and that the
16098 CC mode is at least as constrained as REQ_MODE. */
16099
16100 bool
16101 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16102 {
16103 rtx set;
16104 enum machine_mode set_mode;
16105
16106 set = PATTERN (insn);
16107 if (GET_CODE (set) == PARALLEL)
16108 set = XVECEXP (set, 0, 0);
16109 gcc_assert (GET_CODE (set) == SET);
16110 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
16111
16112 set_mode = GET_MODE (SET_DEST (set));
16113 switch (set_mode)
16114 {
16115 case CCNOmode:
16116 if (req_mode != CCNOmode
16117 && (req_mode != CCmode
16118 || XEXP (SET_SRC (set), 1) != const0_rtx))
16119 return false;
16120 break;
16121 case CCmode:
16122 if (req_mode == CCGCmode)
16123 return false;
16124 /* FALLTHRU */
16125 case CCGCmode:
16126 if (req_mode == CCGOCmode || req_mode == CCNOmode)
16127 return false;
16128 /* FALLTHRU */
16129 case CCGOCmode:
16130 if (req_mode == CCZmode)
16131 return false;
16132 /* FALLTHRU */
16133 case CCAmode:
16134 case CCCmode:
16135 case CCOmode:
16136 case CCSmode:
16137 case CCZmode:
16138 break;
16139
16140 default:
16141 gcc_unreachable ();
16142 }
16143
16144 return GET_MODE (SET_SRC (set)) == set_mode;
16145 }
16146
16147 /* Generate insn patterns to do an integer compare of OPERANDS. */
16148
16149 static rtx
16150 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
16151 {
16152 enum machine_mode cmpmode;
16153 rtx tmp, flags;
16154
16155 cmpmode = SELECT_CC_MODE (code, op0, op1);
16156 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
16157
16158 /* This is very simple, but making the interface the same as in the
16159 FP case makes the rest of the code easier. */
16160 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
16161 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
16162
16163 /* Return the test that should be put into the flags user, i.e.
16164 the bcc, scc, or cmov instruction. */
16165 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
16166 }
16167
16168 /* Figure out whether to use ordered or unordered fp comparisons.
16169 Return the appropriate mode to use. */
16170
16171 enum machine_mode
16172 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
16173 {
16174 /* ??? In order to make all comparisons reversible, we do all comparisons
16175 non-trapping when compiling for IEEE. Once gcc is able to distinguish
16176 all forms trapping and nontrapping comparisons, we can make inequality
16177 comparisons trapping again, since it results in better code when using
16178 FCOM based compares. */
16179 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
16180 }
16181
16182 enum machine_mode
16183 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
16184 {
16185 enum machine_mode mode = GET_MODE (op0);
16186
16187 if (SCALAR_FLOAT_MODE_P (mode))
16188 {
16189 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16190 return ix86_fp_compare_mode (code);
16191 }
16192
16193 switch (code)
16194 {
16195 /* Only zero flag is needed. */
16196 case EQ: /* ZF=0 */
16197 case NE: /* ZF!=0 */
16198 return CCZmode;
16199 /* Codes needing carry flag. */
16200 case GEU: /* CF=0 */
16201 case LTU: /* CF=1 */
16202 /* Detect overflow checks. They need just the carry flag. */
16203 if (GET_CODE (op0) == PLUS
16204 && rtx_equal_p (op1, XEXP (op0, 0)))
16205 return CCCmode;
16206 else
16207 return CCmode;
16208 case GTU: /* CF=0 & ZF=0 */
16209 case LEU: /* CF=1 | ZF=1 */
16210 /* Detect overflow checks. They need just the carry flag. */
16211 if (GET_CODE (op0) == MINUS
16212 && rtx_equal_p (op1, XEXP (op0, 0)))
16213 return CCCmode;
16214 else
16215 return CCmode;
16216 /* Codes possibly doable only with sign flag when
16217 comparing against zero. */
16218 case GE: /* SF=OF or SF=0 */
16219 case LT: /* SF<>OF or SF=1 */
16220 if (op1 == const0_rtx)
16221 return CCGOCmode;
16222 else
16223 /* For other cases Carry flag is not required. */
16224 return CCGCmode;
16225 /* Codes doable only with sign flag when comparing
16226 against zero, but we miss jump instruction for it
16227 so we need to use relational tests against overflow
16228 that thus needs to be zero. */
16229 case GT: /* ZF=0 & SF=OF */
16230 case LE: /* ZF=1 | SF<>OF */
16231 if (op1 == const0_rtx)
16232 return CCNOmode;
16233 else
16234 return CCGCmode;
16235 /* strcmp pattern do (use flags) and combine may ask us for proper
16236 mode. */
16237 case USE:
16238 return CCmode;
16239 default:
16240 gcc_unreachable ();
16241 }
16242 }
16243
16244 /* Return the fixed registers used for condition codes. */
16245
16246 static bool
16247 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
16248 {
16249 *p1 = FLAGS_REG;
16250 *p2 = FPSR_REG;
16251 return true;
16252 }
16253
16254 /* If two condition code modes are compatible, return a condition code
16255 mode which is compatible with both. Otherwise, return
16256 VOIDmode. */
16257
16258 static enum machine_mode
16259 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
16260 {
16261 if (m1 == m2)
16262 return m1;
16263
16264 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
16265 return VOIDmode;
16266
16267 if ((m1 == CCGCmode && m2 == CCGOCmode)
16268 || (m1 == CCGOCmode && m2 == CCGCmode))
16269 return CCGCmode;
16270
16271 switch (m1)
16272 {
16273 default:
16274 gcc_unreachable ();
16275
16276 case CCmode:
16277 case CCGCmode:
16278 case CCGOCmode:
16279 case CCNOmode:
16280 case CCAmode:
16281 case CCCmode:
16282 case CCOmode:
16283 case CCSmode:
16284 case CCZmode:
16285 switch (m2)
16286 {
16287 default:
16288 return VOIDmode;
16289
16290 case CCmode:
16291 case CCGCmode:
16292 case CCGOCmode:
16293 case CCNOmode:
16294 case CCAmode:
16295 case CCCmode:
16296 case CCOmode:
16297 case CCSmode:
16298 case CCZmode:
16299 return CCmode;
16300 }
16301
16302 case CCFPmode:
16303 case CCFPUmode:
16304 /* These are only compatible with themselves, which we already
16305 checked above. */
16306 return VOIDmode;
16307 }
16308 }
16309
16310
16311 /* Return a comparison we can do and that it is equivalent to
16312 swap_condition (code) apart possibly from orderedness.
16313 But, never change orderedness if TARGET_IEEE_FP, returning
16314 UNKNOWN in that case if necessary. */
16315
16316 static enum rtx_code
16317 ix86_fp_swap_condition (enum rtx_code code)
16318 {
16319 switch (code)
16320 {
16321 case GT: /* GTU - CF=0 & ZF=0 */
16322 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
16323 case GE: /* GEU - CF=0 */
16324 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
16325 case UNLT: /* LTU - CF=1 */
16326 return TARGET_IEEE_FP ? UNKNOWN : GT;
16327 case UNLE: /* LEU - CF=1 | ZF=1 */
16328 return TARGET_IEEE_FP ? UNKNOWN : GE;
16329 default:
16330 return swap_condition (code);
16331 }
16332 }
16333
16334 /* Return cost of comparison CODE using the best strategy for performance.
16335 All following functions do use number of instructions as a cost metrics.
16336 In future this should be tweaked to compute bytes for optimize_size and
16337 take into account performance of various instructions on various CPUs. */
16338
16339 static int
16340 ix86_fp_comparison_cost (enum rtx_code code)
16341 {
16342 int arith_cost;
16343
16344 /* The cost of code using bit-twiddling on %ah. */
16345 switch (code)
16346 {
16347 case UNLE:
16348 case UNLT:
16349 case LTGT:
16350 case GT:
16351 case GE:
16352 case UNORDERED:
16353 case ORDERED:
16354 case UNEQ:
16355 arith_cost = 4;
16356 break;
16357 case LT:
16358 case NE:
16359 case EQ:
16360 case UNGE:
16361 arith_cost = TARGET_IEEE_FP ? 5 : 4;
16362 break;
16363 case LE:
16364 case UNGT:
16365 arith_cost = TARGET_IEEE_FP ? 6 : 4;
16366 break;
16367 default:
16368 gcc_unreachable ();
16369 }
16370
16371 switch (ix86_fp_comparison_strategy (code))
16372 {
16373 case IX86_FPCMP_COMI:
16374 return arith_cost > 4 ? 3 : 2;
16375 case IX86_FPCMP_SAHF:
16376 return arith_cost > 4 ? 4 : 3;
16377 default:
16378 return arith_cost;
16379 }
16380 }
16381
16382 /* Return strategy to use for floating-point. We assume that fcomi is always
16383 preferrable where available, since that is also true when looking at size
16384 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
16385
16386 enum ix86_fpcmp_strategy
16387 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
16388 {
16389 /* Do fcomi/sahf based test when profitable. */
16390
16391 if (TARGET_CMOVE)
16392 return IX86_FPCMP_COMI;
16393
16394 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
16395 return IX86_FPCMP_SAHF;
16396
16397 return IX86_FPCMP_ARITH;
16398 }
16399
16400 /* Swap, force into registers, or otherwise massage the two operands
16401 to a fp comparison. The operands are updated in place; the new
16402 comparison code is returned. */
16403
16404 static enum rtx_code
16405 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
16406 {
16407 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
16408 rtx op0 = *pop0, op1 = *pop1;
16409 enum machine_mode op_mode = GET_MODE (op0);
16410 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
16411
16412 /* All of the unordered compare instructions only work on registers.
16413 The same is true of the fcomi compare instructions. The XFmode
16414 compare instructions require registers except when comparing
16415 against zero or when converting operand 1 from fixed point to
16416 floating point. */
16417
16418 if (!is_sse
16419 && (fpcmp_mode == CCFPUmode
16420 || (op_mode == XFmode
16421 && ! (standard_80387_constant_p (op0) == 1
16422 || standard_80387_constant_p (op1) == 1)
16423 && GET_CODE (op1) != FLOAT)
16424 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
16425 {
16426 op0 = force_reg (op_mode, op0);
16427 op1 = force_reg (op_mode, op1);
16428 }
16429 else
16430 {
16431 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
16432 things around if they appear profitable, otherwise force op0
16433 into a register. */
16434
16435 if (standard_80387_constant_p (op0) == 0
16436 || (MEM_P (op0)
16437 && ! (standard_80387_constant_p (op1) == 0
16438 || MEM_P (op1))))
16439 {
16440 enum rtx_code new_code = ix86_fp_swap_condition (code);
16441 if (new_code != UNKNOWN)
16442 {
16443 rtx tmp;
16444 tmp = op0, op0 = op1, op1 = tmp;
16445 code = new_code;
16446 }
16447 }
16448
16449 if (!REG_P (op0))
16450 op0 = force_reg (op_mode, op0);
16451
16452 if (CONSTANT_P (op1))
16453 {
16454 int tmp = standard_80387_constant_p (op1);
16455 if (tmp == 0)
16456 op1 = validize_mem (force_const_mem (op_mode, op1));
16457 else if (tmp == 1)
16458 {
16459 if (TARGET_CMOVE)
16460 op1 = force_reg (op_mode, op1);
16461 }
16462 else
16463 op1 = force_reg (op_mode, op1);
16464 }
16465 }
16466
16467 /* Try to rearrange the comparison to make it cheaper. */
16468 if (ix86_fp_comparison_cost (code)
16469 > ix86_fp_comparison_cost (swap_condition (code))
16470 && (REG_P (op1) || can_create_pseudo_p ()))
16471 {
16472 rtx tmp;
16473 tmp = op0, op0 = op1, op1 = tmp;
16474 code = swap_condition (code);
16475 if (!REG_P (op0))
16476 op0 = force_reg (op_mode, op0);
16477 }
16478
16479 *pop0 = op0;
16480 *pop1 = op1;
16481 return code;
16482 }
16483
16484 /* Convert comparison codes we use to represent FP comparison to integer
16485 code that will result in proper branch. Return UNKNOWN if no such code
16486 is available. */
16487
16488 enum rtx_code
16489 ix86_fp_compare_code_to_integer (enum rtx_code code)
16490 {
16491 switch (code)
16492 {
16493 case GT:
16494 return GTU;
16495 case GE:
16496 return GEU;
16497 case ORDERED:
16498 case UNORDERED:
16499 return code;
16500 break;
16501 case UNEQ:
16502 return EQ;
16503 break;
16504 case UNLT:
16505 return LTU;
16506 break;
16507 case UNLE:
16508 return LEU;
16509 break;
16510 case LTGT:
16511 return NE;
16512 break;
16513 default:
16514 return UNKNOWN;
16515 }
16516 }
16517
16518 /* Generate insn patterns to do a floating point compare of OPERANDS. */
16519
16520 static rtx
16521 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
16522 {
16523 enum machine_mode fpcmp_mode, intcmp_mode;
16524 rtx tmp, tmp2;
16525
16526 fpcmp_mode = ix86_fp_compare_mode (code);
16527 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
16528
16529 /* Do fcomi/sahf based test when profitable. */
16530 switch (ix86_fp_comparison_strategy (code))
16531 {
16532 case IX86_FPCMP_COMI:
16533 intcmp_mode = fpcmp_mode;
16534 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
16535 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
16536 tmp);
16537 emit_insn (tmp);
16538 break;
16539
16540 case IX86_FPCMP_SAHF:
16541 intcmp_mode = fpcmp_mode;
16542 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
16543 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
16544 tmp);
16545
16546 if (!scratch)
16547 scratch = gen_reg_rtx (HImode);
16548 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
16549 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
16550 break;
16551
16552 case IX86_FPCMP_ARITH:
16553 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
16554 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
16555 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
16556 if (!scratch)
16557 scratch = gen_reg_rtx (HImode);
16558 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
16559
16560 /* In the unordered case, we have to check C2 for NaN's, which
16561 doesn't happen to work out to anything nice combination-wise.
16562 So do some bit twiddling on the value we've got in AH to come
16563 up with an appropriate set of condition codes. */
16564
16565 intcmp_mode = CCNOmode;
16566 switch (code)
16567 {
16568 case GT:
16569 case UNGT:
16570 if (code == GT || !TARGET_IEEE_FP)
16571 {
16572 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
16573 code = EQ;
16574 }
16575 else
16576 {
16577 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16578 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
16579 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
16580 intcmp_mode = CCmode;
16581 code = GEU;
16582 }
16583 break;
16584 case LT:
16585 case UNLT:
16586 if (code == LT && TARGET_IEEE_FP)
16587 {
16588 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16589 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
16590 intcmp_mode = CCmode;
16591 code = EQ;
16592 }
16593 else
16594 {
16595 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
16596 code = NE;
16597 }
16598 break;
16599 case GE:
16600 case UNGE:
16601 if (code == GE || !TARGET_IEEE_FP)
16602 {
16603 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
16604 code = EQ;
16605 }
16606 else
16607 {
16608 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16609 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
16610 code = NE;
16611 }
16612 break;
16613 case LE:
16614 case UNLE:
16615 if (code == LE && TARGET_IEEE_FP)
16616 {
16617 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16618 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
16619 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
16620 intcmp_mode = CCmode;
16621 code = LTU;
16622 }
16623 else
16624 {
16625 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
16626 code = NE;
16627 }
16628 break;
16629 case EQ:
16630 case UNEQ:
16631 if (code == EQ && TARGET_IEEE_FP)
16632 {
16633 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16634 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
16635 intcmp_mode = CCmode;
16636 code = EQ;
16637 }
16638 else
16639 {
16640 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
16641 code = NE;
16642 }
16643 break;
16644 case NE:
16645 case LTGT:
16646 if (code == NE && TARGET_IEEE_FP)
16647 {
16648 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16649 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
16650 GEN_INT (0x40)));
16651 code = NE;
16652 }
16653 else
16654 {
16655 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
16656 code = EQ;
16657 }
16658 break;
16659
16660 case UNORDERED:
16661 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
16662 code = NE;
16663 break;
16664 case ORDERED:
16665 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
16666 code = EQ;
16667 break;
16668
16669 default:
16670 gcc_unreachable ();
16671 }
16672 break;
16673
16674 default:
16675 gcc_unreachable();
16676 }
16677
16678 /* Return the test that should be put into the flags user, i.e.
16679 the bcc, scc, or cmov instruction. */
16680 return gen_rtx_fmt_ee (code, VOIDmode,
16681 gen_rtx_REG (intcmp_mode, FLAGS_REG),
16682 const0_rtx);
16683 }
16684
16685 static rtx
16686 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
16687 {
16688 rtx ret;
16689
16690 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
16691 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
16692
16693 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
16694 {
16695 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
16696 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
16697 }
16698 else
16699 ret = ix86_expand_int_compare (code, op0, op1);
16700
16701 return ret;
16702 }
16703
16704 void
16705 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
16706 {
16707 enum machine_mode mode = GET_MODE (op0);
16708 rtx tmp;
16709
16710 switch (mode)
16711 {
16712 case SFmode:
16713 case DFmode:
16714 case XFmode:
16715 case QImode:
16716 case HImode:
16717 case SImode:
16718 simple:
16719 tmp = ix86_expand_compare (code, op0, op1);
16720 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16721 gen_rtx_LABEL_REF (VOIDmode, label),
16722 pc_rtx);
16723 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
16724 return;
16725
16726 case DImode:
16727 if (TARGET_64BIT)
16728 goto simple;
16729 case TImode:
16730 /* Expand DImode branch into multiple compare+branch. */
16731 {
16732 rtx lo[2], hi[2], label2;
16733 enum rtx_code code1, code2, code3;
16734 enum machine_mode submode;
16735
16736 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
16737 {
16738 tmp = op0, op0 = op1, op1 = tmp;
16739 code = swap_condition (code);
16740 }
16741
16742 split_double_mode (mode, &op0, 1, lo+0, hi+0);
16743 split_double_mode (mode, &op1, 1, lo+1, hi+1);
16744
16745 submode = mode == DImode ? SImode : DImode;
16746
16747 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
16748 avoid two branches. This costs one extra insn, so disable when
16749 optimizing for size. */
16750
16751 if ((code == EQ || code == NE)
16752 && (!optimize_insn_for_size_p ()
16753 || hi[1] == const0_rtx || lo[1] == const0_rtx))
16754 {
16755 rtx xor0, xor1;
16756
16757 xor1 = hi[0];
16758 if (hi[1] != const0_rtx)
16759 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
16760 NULL_RTX, 0, OPTAB_WIDEN);
16761
16762 xor0 = lo[0];
16763 if (lo[1] != const0_rtx)
16764 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
16765 NULL_RTX, 0, OPTAB_WIDEN);
16766
16767 tmp = expand_binop (submode, ior_optab, xor1, xor0,
16768 NULL_RTX, 0, OPTAB_WIDEN);
16769
16770 ix86_expand_branch (code, tmp, const0_rtx, label);
16771 return;
16772 }
16773
16774 /* Otherwise, if we are doing less-than or greater-or-equal-than,
16775 op1 is a constant and the low word is zero, then we can just
16776 examine the high word. Similarly for low word -1 and
16777 less-or-equal-than or greater-than. */
16778
16779 if (CONST_INT_P (hi[1]))
16780 switch (code)
16781 {
16782 case LT: case LTU: case GE: case GEU:
16783 if (lo[1] == const0_rtx)
16784 {
16785 ix86_expand_branch (code, hi[0], hi[1], label);
16786 return;
16787 }
16788 break;
16789 case LE: case LEU: case GT: case GTU:
16790 if (lo[1] == constm1_rtx)
16791 {
16792 ix86_expand_branch (code, hi[0], hi[1], label);
16793 return;
16794 }
16795 break;
16796 default:
16797 break;
16798 }
16799
16800 /* Otherwise, we need two or three jumps. */
16801
16802 label2 = gen_label_rtx ();
16803
16804 code1 = code;
16805 code2 = swap_condition (code);
16806 code3 = unsigned_condition (code);
16807
16808 switch (code)
16809 {
16810 case LT: case GT: case LTU: case GTU:
16811 break;
16812
16813 case LE: code1 = LT; code2 = GT; break;
16814 case GE: code1 = GT; code2 = LT; break;
16815 case LEU: code1 = LTU; code2 = GTU; break;
16816 case GEU: code1 = GTU; code2 = LTU; break;
16817
16818 case EQ: code1 = UNKNOWN; code2 = NE; break;
16819 case NE: code2 = UNKNOWN; break;
16820
16821 default:
16822 gcc_unreachable ();
16823 }
16824
16825 /*
16826 * a < b =>
16827 * if (hi(a) < hi(b)) goto true;
16828 * if (hi(a) > hi(b)) goto false;
16829 * if (lo(a) < lo(b)) goto true;
16830 * false:
16831 */
16832
16833 if (code1 != UNKNOWN)
16834 ix86_expand_branch (code1, hi[0], hi[1], label);
16835 if (code2 != UNKNOWN)
16836 ix86_expand_branch (code2, hi[0], hi[1], label2);
16837
16838 ix86_expand_branch (code3, lo[0], lo[1], label);
16839
16840 if (code2 != UNKNOWN)
16841 emit_label (label2);
16842 return;
16843 }
16844
16845 default:
16846 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
16847 goto simple;
16848 }
16849 }
16850
16851 /* Split branch based on floating point condition. */
16852 void
16853 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
16854 rtx target1, rtx target2, rtx tmp, rtx pushed)
16855 {
16856 rtx condition;
16857 rtx i;
16858
16859 if (target2 != pc_rtx)
16860 {
16861 rtx tmp = target2;
16862 code = reverse_condition_maybe_unordered (code);
16863 target2 = target1;
16864 target1 = tmp;
16865 }
16866
16867 condition = ix86_expand_fp_compare (code, op1, op2,
16868 tmp);
16869
16870 /* Remove pushed operand from stack. */
16871 if (pushed)
16872 ix86_free_from_memory (GET_MODE (pushed));
16873
16874 i = emit_jump_insn (gen_rtx_SET
16875 (VOIDmode, pc_rtx,
16876 gen_rtx_IF_THEN_ELSE (VOIDmode,
16877 condition, target1, target2)));
16878 if (split_branch_probability >= 0)
16879 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
16880 }
16881
16882 void
16883 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
16884 {
16885 rtx ret;
16886
16887 gcc_assert (GET_MODE (dest) == QImode);
16888
16889 ret = ix86_expand_compare (code, op0, op1);
16890 PUT_MODE (ret, QImode);
16891 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
16892 }
16893
16894 /* Expand comparison setting or clearing carry flag. Return true when
16895 successful and set pop for the operation. */
16896 static bool
16897 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
16898 {
16899 enum machine_mode mode =
16900 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
16901
16902 /* Do not handle double-mode compares that go through special path. */
16903 if (mode == (TARGET_64BIT ? TImode : DImode))
16904 return false;
16905
16906 if (SCALAR_FLOAT_MODE_P (mode))
16907 {
16908 rtx compare_op, compare_seq;
16909
16910 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16911
16912 /* Shortcut: following common codes never translate
16913 into carry flag compares. */
16914 if (code == EQ || code == NE || code == UNEQ || code == LTGT
16915 || code == ORDERED || code == UNORDERED)
16916 return false;
16917
16918 /* These comparisons require zero flag; swap operands so they won't. */
16919 if ((code == GT || code == UNLE || code == LE || code == UNGT)
16920 && !TARGET_IEEE_FP)
16921 {
16922 rtx tmp = op0;
16923 op0 = op1;
16924 op1 = tmp;
16925 code = swap_condition (code);
16926 }
16927
16928 /* Try to expand the comparison and verify that we end up with
16929 carry flag based comparison. This fails to be true only when
16930 we decide to expand comparison using arithmetic that is not
16931 too common scenario. */
16932 start_sequence ();
16933 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
16934 compare_seq = get_insns ();
16935 end_sequence ();
16936
16937 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16938 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16939 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
16940 else
16941 code = GET_CODE (compare_op);
16942
16943 if (code != LTU && code != GEU)
16944 return false;
16945
16946 emit_insn (compare_seq);
16947 *pop = compare_op;
16948 return true;
16949 }
16950
16951 if (!INTEGRAL_MODE_P (mode))
16952 return false;
16953
16954 switch (code)
16955 {
16956 case LTU:
16957 case GEU:
16958 break;
16959
16960 /* Convert a==0 into (unsigned)a<1. */
16961 case EQ:
16962 case NE:
16963 if (op1 != const0_rtx)
16964 return false;
16965 op1 = const1_rtx;
16966 code = (code == EQ ? LTU : GEU);
16967 break;
16968
16969 /* Convert a>b into b<a or a>=b-1. */
16970 case GTU:
16971 case LEU:
16972 if (CONST_INT_P (op1))
16973 {
16974 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
16975 /* Bail out on overflow. We still can swap operands but that
16976 would force loading of the constant into register. */
16977 if (op1 == const0_rtx
16978 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
16979 return false;
16980 code = (code == GTU ? GEU : LTU);
16981 }
16982 else
16983 {
16984 rtx tmp = op1;
16985 op1 = op0;
16986 op0 = tmp;
16987 code = (code == GTU ? LTU : GEU);
16988 }
16989 break;
16990
16991 /* Convert a>=0 into (unsigned)a<0x80000000. */
16992 case LT:
16993 case GE:
16994 if (mode == DImode || op1 != const0_rtx)
16995 return false;
16996 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
16997 code = (code == LT ? GEU : LTU);
16998 break;
16999 case LE:
17000 case GT:
17001 if (mode == DImode || op1 != constm1_rtx)
17002 return false;
17003 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17004 code = (code == LE ? GEU : LTU);
17005 break;
17006
17007 default:
17008 return false;
17009 }
17010 /* Swapping operands may cause constant to appear as first operand. */
17011 if (!nonimmediate_operand (op0, VOIDmode))
17012 {
17013 if (!can_create_pseudo_p ())
17014 return false;
17015 op0 = force_reg (mode, op0);
17016 }
17017 *pop = ix86_expand_compare (code, op0, op1);
17018 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
17019 return true;
17020 }
17021
17022 bool
17023 ix86_expand_int_movcc (rtx operands[])
17024 {
17025 enum rtx_code code = GET_CODE (operands[1]), compare_code;
17026 rtx compare_seq, compare_op;
17027 enum machine_mode mode = GET_MODE (operands[0]);
17028 bool sign_bit_compare_p = false;
17029 rtx op0 = XEXP (operands[1], 0);
17030 rtx op1 = XEXP (operands[1], 1);
17031
17032 start_sequence ();
17033 compare_op = ix86_expand_compare (code, op0, op1);
17034 compare_seq = get_insns ();
17035 end_sequence ();
17036
17037 compare_code = GET_CODE (compare_op);
17038
17039 if ((op1 == const0_rtx && (code == GE || code == LT))
17040 || (op1 == constm1_rtx && (code == GT || code == LE)))
17041 sign_bit_compare_p = true;
17042
17043 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
17044 HImode insns, we'd be swallowed in word prefix ops. */
17045
17046 if ((mode != HImode || TARGET_FAST_PREFIX)
17047 && (mode != (TARGET_64BIT ? TImode : DImode))
17048 && CONST_INT_P (operands[2])
17049 && CONST_INT_P (operands[3]))
17050 {
17051 rtx out = operands[0];
17052 HOST_WIDE_INT ct = INTVAL (operands[2]);
17053 HOST_WIDE_INT cf = INTVAL (operands[3]);
17054 HOST_WIDE_INT diff;
17055
17056 diff = ct - cf;
17057 /* Sign bit compares are better done using shifts than we do by using
17058 sbb. */
17059 if (sign_bit_compare_p
17060 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
17061 {
17062 /* Detect overlap between destination and compare sources. */
17063 rtx tmp = out;
17064
17065 if (!sign_bit_compare_p)
17066 {
17067 rtx flags;
17068 bool fpcmp = false;
17069
17070 compare_code = GET_CODE (compare_op);
17071
17072 flags = XEXP (compare_op, 0);
17073
17074 if (GET_MODE (flags) == CCFPmode
17075 || GET_MODE (flags) == CCFPUmode)
17076 {
17077 fpcmp = true;
17078 compare_code
17079 = ix86_fp_compare_code_to_integer (compare_code);
17080 }
17081
17082 /* To simplify rest of code, restrict to the GEU case. */
17083 if (compare_code == LTU)
17084 {
17085 HOST_WIDE_INT tmp = ct;
17086 ct = cf;
17087 cf = tmp;
17088 compare_code = reverse_condition (compare_code);
17089 code = reverse_condition (code);
17090 }
17091 else
17092 {
17093 if (fpcmp)
17094 PUT_CODE (compare_op,
17095 reverse_condition_maybe_unordered
17096 (GET_CODE (compare_op)));
17097 else
17098 PUT_CODE (compare_op,
17099 reverse_condition (GET_CODE (compare_op)));
17100 }
17101 diff = ct - cf;
17102
17103 if (reg_overlap_mentioned_p (out, op0)
17104 || reg_overlap_mentioned_p (out, op1))
17105 tmp = gen_reg_rtx (mode);
17106
17107 if (mode == DImode)
17108 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
17109 else
17110 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
17111 flags, compare_op));
17112 }
17113 else
17114 {
17115 if (code == GT || code == GE)
17116 code = reverse_condition (code);
17117 else
17118 {
17119 HOST_WIDE_INT tmp = ct;
17120 ct = cf;
17121 cf = tmp;
17122 diff = ct - cf;
17123 }
17124 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
17125 }
17126
17127 if (diff == 1)
17128 {
17129 /*
17130 * cmpl op0,op1
17131 * sbbl dest,dest
17132 * [addl dest, ct]
17133 *
17134 * Size 5 - 8.
17135 */
17136 if (ct)
17137 tmp = expand_simple_binop (mode, PLUS,
17138 tmp, GEN_INT (ct),
17139 copy_rtx (tmp), 1, OPTAB_DIRECT);
17140 }
17141 else if (cf == -1)
17142 {
17143 /*
17144 * cmpl op0,op1
17145 * sbbl dest,dest
17146 * orl $ct, dest
17147 *
17148 * Size 8.
17149 */
17150 tmp = expand_simple_binop (mode, IOR,
17151 tmp, GEN_INT (ct),
17152 copy_rtx (tmp), 1, OPTAB_DIRECT);
17153 }
17154 else if (diff == -1 && ct)
17155 {
17156 /*
17157 * cmpl op0,op1
17158 * sbbl dest,dest
17159 * notl dest
17160 * [addl dest, cf]
17161 *
17162 * Size 8 - 11.
17163 */
17164 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17165 if (cf)
17166 tmp = expand_simple_binop (mode, PLUS,
17167 copy_rtx (tmp), GEN_INT (cf),
17168 copy_rtx (tmp), 1, OPTAB_DIRECT);
17169 }
17170 else
17171 {
17172 /*
17173 * cmpl op0,op1
17174 * sbbl dest,dest
17175 * [notl dest]
17176 * andl cf - ct, dest
17177 * [addl dest, ct]
17178 *
17179 * Size 8 - 11.
17180 */
17181
17182 if (cf == 0)
17183 {
17184 cf = ct;
17185 ct = 0;
17186 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17187 }
17188
17189 tmp = expand_simple_binop (mode, AND,
17190 copy_rtx (tmp),
17191 gen_int_mode (cf - ct, mode),
17192 copy_rtx (tmp), 1, OPTAB_DIRECT);
17193 if (ct)
17194 tmp = expand_simple_binop (mode, PLUS,
17195 copy_rtx (tmp), GEN_INT (ct),
17196 copy_rtx (tmp), 1, OPTAB_DIRECT);
17197 }
17198
17199 if (!rtx_equal_p (tmp, out))
17200 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
17201
17202 return true;
17203 }
17204
17205 if (diff < 0)
17206 {
17207 enum machine_mode cmp_mode = GET_MODE (op0);
17208
17209 HOST_WIDE_INT tmp;
17210 tmp = ct, ct = cf, cf = tmp;
17211 diff = -diff;
17212
17213 if (SCALAR_FLOAT_MODE_P (cmp_mode))
17214 {
17215 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
17216
17217 /* We may be reversing unordered compare to normal compare, that
17218 is not valid in general (we may convert non-trapping condition
17219 to trapping one), however on i386 we currently emit all
17220 comparisons unordered. */
17221 compare_code = reverse_condition_maybe_unordered (compare_code);
17222 code = reverse_condition_maybe_unordered (code);
17223 }
17224 else
17225 {
17226 compare_code = reverse_condition (compare_code);
17227 code = reverse_condition (code);
17228 }
17229 }
17230
17231 compare_code = UNKNOWN;
17232 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
17233 && CONST_INT_P (op1))
17234 {
17235 if (op1 == const0_rtx
17236 && (code == LT || code == GE))
17237 compare_code = code;
17238 else if (op1 == constm1_rtx)
17239 {
17240 if (code == LE)
17241 compare_code = LT;
17242 else if (code == GT)
17243 compare_code = GE;
17244 }
17245 }
17246
17247 /* Optimize dest = (op0 < 0) ? -1 : cf. */
17248 if (compare_code != UNKNOWN
17249 && GET_MODE (op0) == GET_MODE (out)
17250 && (cf == -1 || ct == -1))
17251 {
17252 /* If lea code below could be used, only optimize
17253 if it results in a 2 insn sequence. */
17254
17255 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
17256 || diff == 3 || diff == 5 || diff == 9)
17257 || (compare_code == LT && ct == -1)
17258 || (compare_code == GE && cf == -1))
17259 {
17260 /*
17261 * notl op1 (if necessary)
17262 * sarl $31, op1
17263 * orl cf, op1
17264 */
17265 if (ct != -1)
17266 {
17267 cf = ct;
17268 ct = -1;
17269 code = reverse_condition (code);
17270 }
17271
17272 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
17273
17274 out = expand_simple_binop (mode, IOR,
17275 out, GEN_INT (cf),
17276 out, 1, OPTAB_DIRECT);
17277 if (out != operands[0])
17278 emit_move_insn (operands[0], out);
17279
17280 return true;
17281 }
17282 }
17283
17284
17285 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
17286 || diff == 3 || diff == 5 || diff == 9)
17287 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
17288 && (mode != DImode
17289 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
17290 {
17291 /*
17292 * xorl dest,dest
17293 * cmpl op1,op2
17294 * setcc dest
17295 * lea cf(dest*(ct-cf)),dest
17296 *
17297 * Size 14.
17298 *
17299 * This also catches the degenerate setcc-only case.
17300 */
17301
17302 rtx tmp;
17303 int nops;
17304
17305 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
17306
17307 nops = 0;
17308 /* On x86_64 the lea instruction operates on Pmode, so we need
17309 to get arithmetics done in proper mode to match. */
17310 if (diff == 1)
17311 tmp = copy_rtx (out);
17312 else
17313 {
17314 rtx out1;
17315 out1 = copy_rtx (out);
17316 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
17317 nops++;
17318 if (diff & 1)
17319 {
17320 tmp = gen_rtx_PLUS (mode, tmp, out1);
17321 nops++;
17322 }
17323 }
17324 if (cf != 0)
17325 {
17326 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
17327 nops++;
17328 }
17329 if (!rtx_equal_p (tmp, out))
17330 {
17331 if (nops == 1)
17332 out = force_operand (tmp, copy_rtx (out));
17333 else
17334 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
17335 }
17336 if (!rtx_equal_p (out, operands[0]))
17337 emit_move_insn (operands[0], copy_rtx (out));
17338
17339 return true;
17340 }
17341
17342 /*
17343 * General case: Jumpful:
17344 * xorl dest,dest cmpl op1, op2
17345 * cmpl op1, op2 movl ct, dest
17346 * setcc dest jcc 1f
17347 * decl dest movl cf, dest
17348 * andl (cf-ct),dest 1:
17349 * addl ct,dest
17350 *
17351 * Size 20. Size 14.
17352 *
17353 * This is reasonably steep, but branch mispredict costs are
17354 * high on modern cpus, so consider failing only if optimizing
17355 * for space.
17356 */
17357
17358 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
17359 && BRANCH_COST (optimize_insn_for_speed_p (),
17360 false) >= 2)
17361 {
17362 if (cf == 0)
17363 {
17364 enum machine_mode cmp_mode = GET_MODE (op0);
17365
17366 cf = ct;
17367 ct = 0;
17368
17369 if (SCALAR_FLOAT_MODE_P (cmp_mode))
17370 {
17371 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
17372
17373 /* We may be reversing unordered compare to normal compare,
17374 that is not valid in general (we may convert non-trapping
17375 condition to trapping one), however on i386 we currently
17376 emit all comparisons unordered. */
17377 code = reverse_condition_maybe_unordered (code);
17378 }
17379 else
17380 {
17381 code = reverse_condition (code);
17382 if (compare_code != UNKNOWN)
17383 compare_code = reverse_condition (compare_code);
17384 }
17385 }
17386
17387 if (compare_code != UNKNOWN)
17388 {
17389 /* notl op1 (if needed)
17390 sarl $31, op1
17391 andl (cf-ct), op1
17392 addl ct, op1
17393
17394 For x < 0 (resp. x <= -1) there will be no notl,
17395 so if possible swap the constants to get rid of the
17396 complement.
17397 True/false will be -1/0 while code below (store flag
17398 followed by decrement) is 0/-1, so the constants need
17399 to be exchanged once more. */
17400
17401 if (compare_code == GE || !cf)
17402 {
17403 code = reverse_condition (code);
17404 compare_code = LT;
17405 }
17406 else
17407 {
17408 HOST_WIDE_INT tmp = cf;
17409 cf = ct;
17410 ct = tmp;
17411 }
17412
17413 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
17414 }
17415 else
17416 {
17417 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
17418
17419 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
17420 constm1_rtx,
17421 copy_rtx (out), 1, OPTAB_DIRECT);
17422 }
17423
17424 out = expand_simple_binop (mode, AND, copy_rtx (out),
17425 gen_int_mode (cf - ct, mode),
17426 copy_rtx (out), 1, OPTAB_DIRECT);
17427 if (ct)
17428 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
17429 copy_rtx (out), 1, OPTAB_DIRECT);
17430 if (!rtx_equal_p (out, operands[0]))
17431 emit_move_insn (operands[0], copy_rtx (out));
17432
17433 return true;
17434 }
17435 }
17436
17437 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
17438 {
17439 /* Try a few things more with specific constants and a variable. */
17440
17441 optab op;
17442 rtx var, orig_out, out, tmp;
17443
17444 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
17445 return false;
17446
17447 /* If one of the two operands is an interesting constant, load a
17448 constant with the above and mask it in with a logical operation. */
17449
17450 if (CONST_INT_P (operands[2]))
17451 {
17452 var = operands[3];
17453 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
17454 operands[3] = constm1_rtx, op = and_optab;
17455 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
17456 operands[3] = const0_rtx, op = ior_optab;
17457 else
17458 return false;
17459 }
17460 else if (CONST_INT_P (operands[3]))
17461 {
17462 var = operands[2];
17463 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
17464 operands[2] = constm1_rtx, op = and_optab;
17465 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
17466 operands[2] = const0_rtx, op = ior_optab;
17467 else
17468 return false;
17469 }
17470 else
17471 return false;
17472
17473 orig_out = operands[0];
17474 tmp = gen_reg_rtx (mode);
17475 operands[0] = tmp;
17476
17477 /* Recurse to get the constant loaded. */
17478 if (ix86_expand_int_movcc (operands) == 0)
17479 return false;
17480
17481 /* Mask in the interesting variable. */
17482 out = expand_binop (mode, op, var, tmp, orig_out, 0,
17483 OPTAB_WIDEN);
17484 if (!rtx_equal_p (out, orig_out))
17485 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
17486
17487 return true;
17488 }
17489
17490 /*
17491 * For comparison with above,
17492 *
17493 * movl cf,dest
17494 * movl ct,tmp
17495 * cmpl op1,op2
17496 * cmovcc tmp,dest
17497 *
17498 * Size 15.
17499 */
17500
17501 if (! nonimmediate_operand (operands[2], mode))
17502 operands[2] = force_reg (mode, operands[2]);
17503 if (! nonimmediate_operand (operands[3], mode))
17504 operands[3] = force_reg (mode, operands[3]);
17505
17506 if (! register_operand (operands[2], VOIDmode)
17507 && (mode == QImode
17508 || ! register_operand (operands[3], VOIDmode)))
17509 operands[2] = force_reg (mode, operands[2]);
17510
17511 if (mode == QImode
17512 && ! register_operand (operands[3], VOIDmode))
17513 operands[3] = force_reg (mode, operands[3]);
17514
17515 emit_insn (compare_seq);
17516 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17517 gen_rtx_IF_THEN_ELSE (mode,
17518 compare_op, operands[2],
17519 operands[3])));
17520 return true;
17521 }
17522
17523 /* Swap, force into registers, or otherwise massage the two operands
17524 to an sse comparison with a mask result. Thus we differ a bit from
17525 ix86_prepare_fp_compare_args which expects to produce a flags result.
17526
17527 The DEST operand exists to help determine whether to commute commutative
17528 operators. The POP0/POP1 operands are updated in place. The new
17529 comparison code is returned, or UNKNOWN if not implementable. */
17530
17531 static enum rtx_code
17532 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
17533 rtx *pop0, rtx *pop1)
17534 {
17535 rtx tmp;
17536
17537 switch (code)
17538 {
17539 case LTGT:
17540 case UNEQ:
17541 /* We have no LTGT as an operator. We could implement it with
17542 NE & ORDERED, but this requires an extra temporary. It's
17543 not clear that it's worth it. */
17544 return UNKNOWN;
17545
17546 case LT:
17547 case LE:
17548 case UNGT:
17549 case UNGE:
17550 /* These are supported directly. */
17551 break;
17552
17553 case EQ:
17554 case NE:
17555 case UNORDERED:
17556 case ORDERED:
17557 /* For commutative operators, try to canonicalize the destination
17558 operand to be first in the comparison - this helps reload to
17559 avoid extra moves. */
17560 if (!dest || !rtx_equal_p (dest, *pop1))
17561 break;
17562 /* FALLTHRU */
17563
17564 case GE:
17565 case GT:
17566 case UNLE:
17567 case UNLT:
17568 /* These are not supported directly. Swap the comparison operands
17569 to transform into something that is supported. */
17570 tmp = *pop0;
17571 *pop0 = *pop1;
17572 *pop1 = tmp;
17573 code = swap_condition (code);
17574 break;
17575
17576 default:
17577 gcc_unreachable ();
17578 }
17579
17580 return code;
17581 }
17582
17583 /* Detect conditional moves that exactly match min/max operational
17584 semantics. Note that this is IEEE safe, as long as we don't
17585 interchange the operands.
17586
17587 Returns FALSE if this conditional move doesn't match a MIN/MAX,
17588 and TRUE if the operation is successful and instructions are emitted. */
17589
17590 static bool
17591 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
17592 rtx cmp_op1, rtx if_true, rtx if_false)
17593 {
17594 enum machine_mode mode;
17595 bool is_min;
17596 rtx tmp;
17597
17598 if (code == LT)
17599 ;
17600 else if (code == UNGE)
17601 {
17602 tmp = if_true;
17603 if_true = if_false;
17604 if_false = tmp;
17605 }
17606 else
17607 return false;
17608
17609 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
17610 is_min = true;
17611 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
17612 is_min = false;
17613 else
17614 return false;
17615
17616 mode = GET_MODE (dest);
17617
17618 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
17619 but MODE may be a vector mode and thus not appropriate. */
17620 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
17621 {
17622 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
17623 rtvec v;
17624
17625 if_true = force_reg (mode, if_true);
17626 v = gen_rtvec (2, if_true, if_false);
17627 tmp = gen_rtx_UNSPEC (mode, v, u);
17628 }
17629 else
17630 {
17631 code = is_min ? SMIN : SMAX;
17632 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
17633 }
17634
17635 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
17636 return true;
17637 }
17638
17639 /* Expand an sse vector comparison. Return the register with the result. */
17640
17641 static rtx
17642 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
17643 rtx op_true, rtx op_false)
17644 {
17645 enum machine_mode mode = GET_MODE (dest);
17646 rtx x;
17647
17648 cmp_op0 = force_reg (mode, cmp_op0);
17649 if (!nonimmediate_operand (cmp_op1, mode))
17650 cmp_op1 = force_reg (mode, cmp_op1);
17651
17652 if (optimize
17653 || reg_overlap_mentioned_p (dest, op_true)
17654 || reg_overlap_mentioned_p (dest, op_false))
17655 dest = gen_reg_rtx (mode);
17656
17657 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
17658 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17659
17660 return dest;
17661 }
17662
17663 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
17664 operations. This is used for both scalar and vector conditional moves. */
17665
17666 static void
17667 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
17668 {
17669 enum machine_mode mode = GET_MODE (dest);
17670 rtx t2, t3, x;
17671
17672 if (op_false == CONST0_RTX (mode))
17673 {
17674 op_true = force_reg (mode, op_true);
17675 x = gen_rtx_AND (mode, cmp, op_true);
17676 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17677 }
17678 else if (op_true == CONST0_RTX (mode))
17679 {
17680 op_false = force_reg (mode, op_false);
17681 x = gen_rtx_NOT (mode, cmp);
17682 x = gen_rtx_AND (mode, x, op_false);
17683 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17684 }
17685 else if (TARGET_XOP)
17686 {
17687 rtx pcmov = gen_rtx_SET (mode, dest,
17688 gen_rtx_IF_THEN_ELSE (mode, cmp,
17689 op_true,
17690 op_false));
17691 emit_insn (pcmov);
17692 }
17693 else
17694 {
17695 op_true = force_reg (mode, op_true);
17696 op_false = force_reg (mode, op_false);
17697
17698 t2 = gen_reg_rtx (mode);
17699 if (optimize)
17700 t3 = gen_reg_rtx (mode);
17701 else
17702 t3 = dest;
17703
17704 x = gen_rtx_AND (mode, op_true, cmp);
17705 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
17706
17707 x = gen_rtx_NOT (mode, cmp);
17708 x = gen_rtx_AND (mode, x, op_false);
17709 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
17710
17711 x = gen_rtx_IOR (mode, t3, t2);
17712 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17713 }
17714 }
17715
17716 /* Expand a floating-point conditional move. Return true if successful. */
17717
17718 bool
17719 ix86_expand_fp_movcc (rtx operands[])
17720 {
17721 enum machine_mode mode = GET_MODE (operands[0]);
17722 enum rtx_code code = GET_CODE (operands[1]);
17723 rtx tmp, compare_op;
17724 rtx op0 = XEXP (operands[1], 0);
17725 rtx op1 = XEXP (operands[1], 1);
17726
17727 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
17728 {
17729 enum machine_mode cmode;
17730
17731 /* Since we've no cmove for sse registers, don't force bad register
17732 allocation just to gain access to it. Deny movcc when the
17733 comparison mode doesn't match the move mode. */
17734 cmode = GET_MODE (op0);
17735 if (cmode == VOIDmode)
17736 cmode = GET_MODE (op1);
17737 if (cmode != mode)
17738 return false;
17739
17740 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
17741 if (code == UNKNOWN)
17742 return false;
17743
17744 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
17745 operands[2], operands[3]))
17746 return true;
17747
17748 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
17749 operands[2], operands[3]);
17750 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
17751 return true;
17752 }
17753
17754 /* The floating point conditional move instructions don't directly
17755 support conditions resulting from a signed integer comparison. */
17756
17757 compare_op = ix86_expand_compare (code, op0, op1);
17758 if (!fcmov_comparison_operator (compare_op, VOIDmode))
17759 {
17760 tmp = gen_reg_rtx (QImode);
17761 ix86_expand_setcc (tmp, code, op0, op1);
17762
17763 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
17764 }
17765
17766 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17767 gen_rtx_IF_THEN_ELSE (mode, compare_op,
17768 operands[2], operands[3])));
17769
17770 return true;
17771 }
17772
17773 /* Expand a floating-point vector conditional move; a vcond operation
17774 rather than a movcc operation. */
17775
17776 bool
17777 ix86_expand_fp_vcond (rtx operands[])
17778 {
17779 enum rtx_code code = GET_CODE (operands[3]);
17780 rtx cmp;
17781
17782 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
17783 &operands[4], &operands[5]);
17784 if (code == UNKNOWN)
17785 return false;
17786
17787 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
17788 operands[5], operands[1], operands[2]))
17789 return true;
17790
17791 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
17792 operands[1], operands[2]);
17793 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
17794 return true;
17795 }
17796
17797 /* Expand a signed/unsigned integral vector conditional move. */
17798
17799 bool
17800 ix86_expand_int_vcond (rtx operands[])
17801 {
17802 enum machine_mode mode = GET_MODE (operands[0]);
17803 enum rtx_code code = GET_CODE (operands[3]);
17804 bool negate = false;
17805 rtx x, cop0, cop1;
17806
17807 cop0 = operands[4];
17808 cop1 = operands[5];
17809
17810 /* XOP supports all of the comparisons on all vector int types. */
17811 if (!TARGET_XOP)
17812 {
17813 /* Canonicalize the comparison to EQ, GT, GTU. */
17814 switch (code)
17815 {
17816 case EQ:
17817 case GT:
17818 case GTU:
17819 break;
17820
17821 case NE:
17822 case LE:
17823 case LEU:
17824 code = reverse_condition (code);
17825 negate = true;
17826 break;
17827
17828 case GE:
17829 case GEU:
17830 code = reverse_condition (code);
17831 negate = true;
17832 /* FALLTHRU */
17833
17834 case LT:
17835 case LTU:
17836 code = swap_condition (code);
17837 x = cop0, cop0 = cop1, cop1 = x;
17838 break;
17839
17840 default:
17841 gcc_unreachable ();
17842 }
17843
17844 /* Only SSE4.1/SSE4.2 supports V2DImode. */
17845 if (mode == V2DImode)
17846 {
17847 switch (code)
17848 {
17849 case EQ:
17850 /* SSE4.1 supports EQ. */
17851 if (!TARGET_SSE4_1)
17852 return false;
17853 break;
17854
17855 case GT:
17856 case GTU:
17857 /* SSE4.2 supports GT/GTU. */
17858 if (!TARGET_SSE4_2)
17859 return false;
17860 break;
17861
17862 default:
17863 gcc_unreachable ();
17864 }
17865 }
17866
17867 /* Unsigned parallel compare is not supported by the hardware.
17868 Play some tricks to turn this into a signed comparison
17869 against 0. */
17870 if (code == GTU)
17871 {
17872 cop0 = force_reg (mode, cop0);
17873
17874 switch (mode)
17875 {
17876 case V4SImode:
17877 case V2DImode:
17878 {
17879 rtx t1, t2, mask;
17880 rtx (*gen_sub3) (rtx, rtx, rtx);
17881
17882 /* Subtract (-(INT MAX) - 1) from both operands to make
17883 them signed. */
17884 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
17885 true, false);
17886 gen_sub3 = (mode == V4SImode
17887 ? gen_subv4si3 : gen_subv2di3);
17888 t1 = gen_reg_rtx (mode);
17889 emit_insn (gen_sub3 (t1, cop0, mask));
17890
17891 t2 = gen_reg_rtx (mode);
17892 emit_insn (gen_sub3 (t2, cop1, mask));
17893
17894 cop0 = t1;
17895 cop1 = t2;
17896 code = GT;
17897 }
17898 break;
17899
17900 case V16QImode:
17901 case V8HImode:
17902 /* Perform a parallel unsigned saturating subtraction. */
17903 x = gen_reg_rtx (mode);
17904 emit_insn (gen_rtx_SET (VOIDmode, x,
17905 gen_rtx_US_MINUS (mode, cop0, cop1)));
17906
17907 cop0 = x;
17908 cop1 = CONST0_RTX (mode);
17909 code = EQ;
17910 negate = !negate;
17911 break;
17912
17913 default:
17914 gcc_unreachable ();
17915 }
17916 }
17917 }
17918
17919 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
17920 operands[1+negate], operands[2-negate]);
17921
17922 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
17923 operands[2-negate]);
17924 return true;
17925 }
17926
17927 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
17928 true if we should do zero extension, else sign extension. HIGH_P is
17929 true if we want the N/2 high elements, else the low elements. */
17930
17931 void
17932 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17933 {
17934 enum machine_mode imode = GET_MODE (operands[1]);
17935 rtx (*unpack)(rtx, rtx, rtx);
17936 rtx se, dest;
17937
17938 switch (imode)
17939 {
17940 case V16QImode:
17941 if (high_p)
17942 unpack = gen_vec_interleave_highv16qi;
17943 else
17944 unpack = gen_vec_interleave_lowv16qi;
17945 break;
17946 case V8HImode:
17947 if (high_p)
17948 unpack = gen_vec_interleave_highv8hi;
17949 else
17950 unpack = gen_vec_interleave_lowv8hi;
17951 break;
17952 case V4SImode:
17953 if (high_p)
17954 unpack = gen_vec_interleave_highv4si;
17955 else
17956 unpack = gen_vec_interleave_lowv4si;
17957 break;
17958 default:
17959 gcc_unreachable ();
17960 }
17961
17962 dest = gen_lowpart (imode, operands[0]);
17963
17964 if (unsigned_p)
17965 se = force_reg (imode, CONST0_RTX (imode));
17966 else
17967 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
17968 operands[1], pc_rtx, pc_rtx);
17969
17970 emit_insn (unpack (dest, operands[1], se));
17971 }
17972
17973 /* This function performs the same task as ix86_expand_sse_unpack,
17974 but with SSE4.1 instructions. */
17975
17976 void
17977 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17978 {
17979 enum machine_mode imode = GET_MODE (operands[1]);
17980 rtx (*unpack)(rtx, rtx);
17981 rtx src, dest;
17982
17983 switch (imode)
17984 {
17985 case V16QImode:
17986 if (unsigned_p)
17987 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
17988 else
17989 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
17990 break;
17991 case V8HImode:
17992 if (unsigned_p)
17993 unpack = gen_sse4_1_zero_extendv4hiv4si2;
17994 else
17995 unpack = gen_sse4_1_sign_extendv4hiv4si2;
17996 break;
17997 case V4SImode:
17998 if (unsigned_p)
17999 unpack = gen_sse4_1_zero_extendv2siv2di2;
18000 else
18001 unpack = gen_sse4_1_sign_extendv2siv2di2;
18002 break;
18003 default:
18004 gcc_unreachable ();
18005 }
18006
18007 dest = operands[0];
18008 if (high_p)
18009 {
18010 /* Shift higher 8 bytes to lower 8 bytes. */
18011 src = gen_reg_rtx (imode);
18012 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
18013 gen_lowpart (V1TImode, operands[1]),
18014 GEN_INT (64)));
18015 }
18016 else
18017 src = operands[1];
18018
18019 emit_insn (unpack (dest, src));
18020 }
18021
18022 /* Expand conditional increment or decrement using adb/sbb instructions.
18023 The default case using setcc followed by the conditional move can be
18024 done by generic code. */
18025 bool
18026 ix86_expand_int_addcc (rtx operands[])
18027 {
18028 enum rtx_code code = GET_CODE (operands[1]);
18029 rtx flags;
18030 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
18031 rtx compare_op;
18032 rtx val = const0_rtx;
18033 bool fpcmp = false;
18034 enum machine_mode mode;
18035 rtx op0 = XEXP (operands[1], 0);
18036 rtx op1 = XEXP (operands[1], 1);
18037
18038 if (operands[3] != const1_rtx
18039 && operands[3] != constm1_rtx)
18040 return false;
18041 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18042 return false;
18043 code = GET_CODE (compare_op);
18044
18045 flags = XEXP (compare_op, 0);
18046
18047 if (GET_MODE (flags) == CCFPmode
18048 || GET_MODE (flags) == CCFPUmode)
18049 {
18050 fpcmp = true;
18051 code = ix86_fp_compare_code_to_integer (code);
18052 }
18053
18054 if (code != LTU)
18055 {
18056 val = constm1_rtx;
18057 if (fpcmp)
18058 PUT_CODE (compare_op,
18059 reverse_condition_maybe_unordered
18060 (GET_CODE (compare_op)));
18061 else
18062 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
18063 }
18064
18065 mode = GET_MODE (operands[0]);
18066
18067 /* Construct either adc or sbb insn. */
18068 if ((code == LTU) == (operands[3] == constm1_rtx))
18069 {
18070 switch (mode)
18071 {
18072 case QImode:
18073 insn = gen_subqi3_carry;
18074 break;
18075 case HImode:
18076 insn = gen_subhi3_carry;
18077 break;
18078 case SImode:
18079 insn = gen_subsi3_carry;
18080 break;
18081 case DImode:
18082 insn = gen_subdi3_carry;
18083 break;
18084 default:
18085 gcc_unreachable ();
18086 }
18087 }
18088 else
18089 {
18090 switch (mode)
18091 {
18092 case QImode:
18093 insn = gen_addqi3_carry;
18094 break;
18095 case HImode:
18096 insn = gen_addhi3_carry;
18097 break;
18098 case SImode:
18099 insn = gen_addsi3_carry;
18100 break;
18101 case DImode:
18102 insn = gen_adddi3_carry;
18103 break;
18104 default:
18105 gcc_unreachable ();
18106 }
18107 }
18108 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
18109
18110 return true;
18111 }
18112
18113
18114 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
18115 but works for floating pointer parameters and nonoffsetable memories.
18116 For pushes, it returns just stack offsets; the values will be saved
18117 in the right order. Maximally three parts are generated. */
18118
18119 static int
18120 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
18121 {
18122 int size;
18123
18124 if (!TARGET_64BIT)
18125 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
18126 else
18127 size = (GET_MODE_SIZE (mode) + 4) / 8;
18128
18129 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
18130 gcc_assert (size >= 2 && size <= 4);
18131
18132 /* Optimize constant pool reference to immediates. This is used by fp
18133 moves, that force all constants to memory to allow combining. */
18134 if (MEM_P (operand) && MEM_READONLY_P (operand))
18135 {
18136 rtx tmp = maybe_get_pool_constant (operand);
18137 if (tmp)
18138 operand = tmp;
18139 }
18140
18141 if (MEM_P (operand) && !offsettable_memref_p (operand))
18142 {
18143 /* The only non-offsetable memories we handle are pushes. */
18144 int ok = push_operand (operand, VOIDmode);
18145
18146 gcc_assert (ok);
18147
18148 operand = copy_rtx (operand);
18149 PUT_MODE (operand, Pmode);
18150 parts[0] = parts[1] = parts[2] = parts[3] = operand;
18151 return size;
18152 }
18153
18154 if (GET_CODE (operand) == CONST_VECTOR)
18155 {
18156 enum machine_mode imode = int_mode_for_mode (mode);
18157 /* Caution: if we looked through a constant pool memory above,
18158 the operand may actually have a different mode now. That's
18159 ok, since we want to pun this all the way back to an integer. */
18160 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
18161 gcc_assert (operand != NULL);
18162 mode = imode;
18163 }
18164
18165 if (!TARGET_64BIT)
18166 {
18167 if (mode == DImode)
18168 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18169 else
18170 {
18171 int i;
18172
18173 if (REG_P (operand))
18174 {
18175 gcc_assert (reload_completed);
18176 for (i = 0; i < size; i++)
18177 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
18178 }
18179 else if (offsettable_memref_p (operand))
18180 {
18181 operand = adjust_address (operand, SImode, 0);
18182 parts[0] = operand;
18183 for (i = 1; i < size; i++)
18184 parts[i] = adjust_address (operand, SImode, 4 * i);
18185 }
18186 else if (GET_CODE (operand) == CONST_DOUBLE)
18187 {
18188 REAL_VALUE_TYPE r;
18189 long l[4];
18190
18191 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18192 switch (mode)
18193 {
18194 case TFmode:
18195 real_to_target (l, &r, mode);
18196 parts[3] = gen_int_mode (l[3], SImode);
18197 parts[2] = gen_int_mode (l[2], SImode);
18198 break;
18199 case XFmode:
18200 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
18201 parts[2] = gen_int_mode (l[2], SImode);
18202 break;
18203 case DFmode:
18204 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
18205 break;
18206 default:
18207 gcc_unreachable ();
18208 }
18209 parts[1] = gen_int_mode (l[1], SImode);
18210 parts[0] = gen_int_mode (l[0], SImode);
18211 }
18212 else
18213 gcc_unreachable ();
18214 }
18215 }
18216 else
18217 {
18218 if (mode == TImode)
18219 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18220 if (mode == XFmode || mode == TFmode)
18221 {
18222 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
18223 if (REG_P (operand))
18224 {
18225 gcc_assert (reload_completed);
18226 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
18227 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
18228 }
18229 else if (offsettable_memref_p (operand))
18230 {
18231 operand = adjust_address (operand, DImode, 0);
18232 parts[0] = operand;
18233 parts[1] = adjust_address (operand, upper_mode, 8);
18234 }
18235 else if (GET_CODE (operand) == CONST_DOUBLE)
18236 {
18237 REAL_VALUE_TYPE r;
18238 long l[4];
18239
18240 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18241 real_to_target (l, &r, mode);
18242
18243 /* Do not use shift by 32 to avoid warning on 32bit systems. */
18244 if (HOST_BITS_PER_WIDE_INT >= 64)
18245 parts[0]
18246 = gen_int_mode
18247 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
18248 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
18249 DImode);
18250 else
18251 parts[0] = immed_double_const (l[0], l[1], DImode);
18252
18253 if (upper_mode == SImode)
18254 parts[1] = gen_int_mode (l[2], SImode);
18255 else if (HOST_BITS_PER_WIDE_INT >= 64)
18256 parts[1]
18257 = gen_int_mode
18258 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
18259 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
18260 DImode);
18261 else
18262 parts[1] = immed_double_const (l[2], l[3], DImode);
18263 }
18264 else
18265 gcc_unreachable ();
18266 }
18267 }
18268
18269 return size;
18270 }
18271
18272 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
18273 Return false when normal moves are needed; true when all required
18274 insns have been emitted. Operands 2-4 contain the input values
18275 int the correct order; operands 5-7 contain the output values. */
18276
18277 void
18278 ix86_split_long_move (rtx operands[])
18279 {
18280 rtx part[2][4];
18281 int nparts, i, j;
18282 int push = 0;
18283 int collisions = 0;
18284 enum machine_mode mode = GET_MODE (operands[0]);
18285 bool collisionparts[4];
18286
18287 /* The DFmode expanders may ask us to move double.
18288 For 64bit target this is single move. By hiding the fact
18289 here we simplify i386.md splitters. */
18290 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
18291 {
18292 /* Optimize constant pool reference to immediates. This is used by
18293 fp moves, that force all constants to memory to allow combining. */
18294
18295 if (MEM_P (operands[1])
18296 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
18297 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
18298 operands[1] = get_pool_constant (XEXP (operands[1], 0));
18299 if (push_operand (operands[0], VOIDmode))
18300 {
18301 operands[0] = copy_rtx (operands[0]);
18302 PUT_MODE (operands[0], Pmode);
18303 }
18304 else
18305 operands[0] = gen_lowpart (DImode, operands[0]);
18306 operands[1] = gen_lowpart (DImode, operands[1]);
18307 emit_move_insn (operands[0], operands[1]);
18308 return;
18309 }
18310
18311 /* The only non-offsettable memory we handle is push. */
18312 if (push_operand (operands[0], VOIDmode))
18313 push = 1;
18314 else
18315 gcc_assert (!MEM_P (operands[0])
18316 || offsettable_memref_p (operands[0]));
18317
18318 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
18319 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
18320
18321 /* When emitting push, take care for source operands on the stack. */
18322 if (push && MEM_P (operands[1])
18323 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
18324 {
18325 rtx src_base = XEXP (part[1][nparts - 1], 0);
18326
18327 /* Compensate for the stack decrement by 4. */
18328 if (!TARGET_64BIT && nparts == 3
18329 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
18330 src_base = plus_constant (src_base, 4);
18331
18332 /* src_base refers to the stack pointer and is
18333 automatically decreased by emitted push. */
18334 for (i = 0; i < nparts; i++)
18335 part[1][i] = change_address (part[1][i],
18336 GET_MODE (part[1][i]), src_base);
18337 }
18338
18339 /* We need to do copy in the right order in case an address register
18340 of the source overlaps the destination. */
18341 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
18342 {
18343 rtx tmp;
18344
18345 for (i = 0; i < nparts; i++)
18346 {
18347 collisionparts[i]
18348 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
18349 if (collisionparts[i])
18350 collisions++;
18351 }
18352
18353 /* Collision in the middle part can be handled by reordering. */
18354 if (collisions == 1 && nparts == 3 && collisionparts [1])
18355 {
18356 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
18357 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
18358 }
18359 else if (collisions == 1
18360 && nparts == 4
18361 && (collisionparts [1] || collisionparts [2]))
18362 {
18363 if (collisionparts [1])
18364 {
18365 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
18366 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
18367 }
18368 else
18369 {
18370 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
18371 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
18372 }
18373 }
18374
18375 /* If there are more collisions, we can't handle it by reordering.
18376 Do an lea to the last part and use only one colliding move. */
18377 else if (collisions > 1)
18378 {
18379 rtx base;
18380
18381 collisions = 1;
18382
18383 base = part[0][nparts - 1];
18384
18385 /* Handle the case when the last part isn't valid for lea.
18386 Happens in 64-bit mode storing the 12-byte XFmode. */
18387 if (GET_MODE (base) != Pmode)
18388 base = gen_rtx_REG (Pmode, REGNO (base));
18389
18390 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
18391 part[1][0] = replace_equiv_address (part[1][0], base);
18392 for (i = 1; i < nparts; i++)
18393 {
18394 tmp = plus_constant (base, UNITS_PER_WORD * i);
18395 part[1][i] = replace_equiv_address (part[1][i], tmp);
18396 }
18397 }
18398 }
18399
18400 if (push)
18401 {
18402 if (!TARGET_64BIT)
18403 {
18404 if (nparts == 3)
18405 {
18406 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
18407 emit_insn (gen_addsi3 (stack_pointer_rtx,
18408 stack_pointer_rtx, GEN_INT (-4)));
18409 emit_move_insn (part[0][2], part[1][2]);
18410 }
18411 else if (nparts == 4)
18412 {
18413 emit_move_insn (part[0][3], part[1][3]);
18414 emit_move_insn (part[0][2], part[1][2]);
18415 }
18416 }
18417 else
18418 {
18419 /* In 64bit mode we don't have 32bit push available. In case this is
18420 register, it is OK - we will just use larger counterpart. We also
18421 retype memory - these comes from attempt to avoid REX prefix on
18422 moving of second half of TFmode value. */
18423 if (GET_MODE (part[1][1]) == SImode)
18424 {
18425 switch (GET_CODE (part[1][1]))
18426 {
18427 case MEM:
18428 part[1][1] = adjust_address (part[1][1], DImode, 0);
18429 break;
18430
18431 case REG:
18432 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
18433 break;
18434
18435 default:
18436 gcc_unreachable ();
18437 }
18438
18439 if (GET_MODE (part[1][0]) == SImode)
18440 part[1][0] = part[1][1];
18441 }
18442 }
18443 emit_move_insn (part[0][1], part[1][1]);
18444 emit_move_insn (part[0][0], part[1][0]);
18445 return;
18446 }
18447
18448 /* Choose correct order to not overwrite the source before it is copied. */
18449 if ((REG_P (part[0][0])
18450 && REG_P (part[1][1])
18451 && (REGNO (part[0][0]) == REGNO (part[1][1])
18452 || (nparts == 3
18453 && REGNO (part[0][0]) == REGNO (part[1][2]))
18454 || (nparts == 4
18455 && REGNO (part[0][0]) == REGNO (part[1][3]))))
18456 || (collisions > 0
18457 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
18458 {
18459 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
18460 {
18461 operands[2 + i] = part[0][j];
18462 operands[6 + i] = part[1][j];
18463 }
18464 }
18465 else
18466 {
18467 for (i = 0; i < nparts; i++)
18468 {
18469 operands[2 + i] = part[0][i];
18470 operands[6 + i] = part[1][i];
18471 }
18472 }
18473
18474 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
18475 if (optimize_insn_for_size_p ())
18476 {
18477 for (j = 0; j < nparts - 1; j++)
18478 if (CONST_INT_P (operands[6 + j])
18479 && operands[6 + j] != const0_rtx
18480 && REG_P (operands[2 + j]))
18481 for (i = j; i < nparts - 1; i++)
18482 if (CONST_INT_P (operands[7 + i])
18483 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
18484 operands[7 + i] = operands[2 + j];
18485 }
18486
18487 for (i = 0; i < nparts; i++)
18488 emit_move_insn (operands[2 + i], operands[6 + i]);
18489
18490 return;
18491 }
18492
18493 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
18494 left shift by a constant, either using a single shift or
18495 a sequence of add instructions. */
18496
18497 static void
18498 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
18499 {
18500 rtx (*insn)(rtx, rtx, rtx);
18501
18502 if (count == 1
18503 || (count * ix86_cost->add <= ix86_cost->shift_const
18504 && !optimize_insn_for_size_p ()))
18505 {
18506 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
18507 while (count-- > 0)
18508 emit_insn (insn (operand, operand, operand));
18509 }
18510 else
18511 {
18512 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
18513 emit_insn (insn (operand, operand, GEN_INT (count)));
18514 }
18515 }
18516
18517 void
18518 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
18519 {
18520 rtx (*gen_ashl3)(rtx, rtx, rtx);
18521 rtx (*gen_shld)(rtx, rtx, rtx);
18522 int half_width = GET_MODE_BITSIZE (mode) >> 1;
18523
18524 rtx low[2], high[2];
18525 int count;
18526
18527 if (CONST_INT_P (operands[2]))
18528 {
18529 split_double_mode (mode, operands, 2, low, high);
18530 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
18531
18532 if (count >= half_width)
18533 {
18534 emit_move_insn (high[0], low[1]);
18535 emit_move_insn (low[0], const0_rtx);
18536
18537 if (count > half_width)
18538 ix86_expand_ashl_const (high[0], count - half_width, mode);
18539 }
18540 else
18541 {
18542 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
18543
18544 if (!rtx_equal_p (operands[0], operands[1]))
18545 emit_move_insn (operands[0], operands[1]);
18546
18547 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
18548 ix86_expand_ashl_const (low[0], count, mode);
18549 }
18550 return;
18551 }
18552
18553 split_double_mode (mode, operands, 1, low, high);
18554
18555 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
18556
18557 if (operands[1] == const1_rtx)
18558 {
18559 /* Assuming we've chosen a QImode capable registers, then 1 << N
18560 can be done with two 32/64-bit shifts, no branches, no cmoves. */
18561 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
18562 {
18563 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
18564
18565 ix86_expand_clear (low[0]);
18566 ix86_expand_clear (high[0]);
18567 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
18568
18569 d = gen_lowpart (QImode, low[0]);
18570 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
18571 s = gen_rtx_EQ (QImode, flags, const0_rtx);
18572 emit_insn (gen_rtx_SET (VOIDmode, d, s));
18573
18574 d = gen_lowpart (QImode, high[0]);
18575 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
18576 s = gen_rtx_NE (QImode, flags, const0_rtx);
18577 emit_insn (gen_rtx_SET (VOIDmode, d, s));
18578 }
18579
18580 /* Otherwise, we can get the same results by manually performing
18581 a bit extract operation on bit 5/6, and then performing the two
18582 shifts. The two methods of getting 0/1 into low/high are exactly
18583 the same size. Avoiding the shift in the bit extract case helps
18584 pentium4 a bit; no one else seems to care much either way. */
18585 else
18586 {
18587 enum machine_mode half_mode;
18588 rtx (*gen_lshr3)(rtx, rtx, rtx);
18589 rtx (*gen_and3)(rtx, rtx, rtx);
18590 rtx (*gen_xor3)(rtx, rtx, rtx);
18591 HOST_WIDE_INT bits;
18592 rtx x;
18593
18594 if (mode == DImode)
18595 {
18596 half_mode = SImode;
18597 gen_lshr3 = gen_lshrsi3;
18598 gen_and3 = gen_andsi3;
18599 gen_xor3 = gen_xorsi3;
18600 bits = 5;
18601 }
18602 else
18603 {
18604 half_mode = DImode;
18605 gen_lshr3 = gen_lshrdi3;
18606 gen_and3 = gen_anddi3;
18607 gen_xor3 = gen_xordi3;
18608 bits = 6;
18609 }
18610
18611 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
18612 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
18613 else
18614 x = gen_lowpart (half_mode, operands[2]);
18615 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
18616
18617 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
18618 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
18619 emit_move_insn (low[0], high[0]);
18620 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
18621 }
18622
18623 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
18624 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
18625 return;
18626 }
18627
18628 if (operands[1] == constm1_rtx)
18629 {
18630 /* For -1 << N, we can avoid the shld instruction, because we
18631 know that we're shifting 0...31/63 ones into a -1. */
18632 emit_move_insn (low[0], constm1_rtx);
18633 if (optimize_insn_for_size_p ())
18634 emit_move_insn (high[0], low[0]);
18635 else
18636 emit_move_insn (high[0], constm1_rtx);
18637 }
18638 else
18639 {
18640 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
18641
18642 if (!rtx_equal_p (operands[0], operands[1]))
18643 emit_move_insn (operands[0], operands[1]);
18644
18645 split_double_mode (mode, operands, 1, low, high);
18646 emit_insn (gen_shld (high[0], low[0], operands[2]));
18647 }
18648
18649 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
18650
18651 if (TARGET_CMOVE && scratch)
18652 {
18653 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
18654 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
18655
18656 ix86_expand_clear (scratch);
18657 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
18658 }
18659 else
18660 {
18661 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
18662 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
18663
18664 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
18665 }
18666 }
18667
18668 void
18669 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
18670 {
18671 rtx (*gen_ashr3)(rtx, rtx, rtx)
18672 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
18673 rtx (*gen_shrd)(rtx, rtx, rtx);
18674 int half_width = GET_MODE_BITSIZE (mode) >> 1;
18675
18676 rtx low[2], high[2];
18677 int count;
18678
18679 if (CONST_INT_P (operands[2]))
18680 {
18681 split_double_mode (mode, operands, 2, low, high);
18682 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
18683
18684 if (count == GET_MODE_BITSIZE (mode) - 1)
18685 {
18686 emit_move_insn (high[0], high[1]);
18687 emit_insn (gen_ashr3 (high[0], high[0],
18688 GEN_INT (half_width - 1)));
18689 emit_move_insn (low[0], high[0]);
18690
18691 }
18692 else if (count >= half_width)
18693 {
18694 emit_move_insn (low[0], high[1]);
18695 emit_move_insn (high[0], low[0]);
18696 emit_insn (gen_ashr3 (high[0], high[0],
18697 GEN_INT (half_width - 1)));
18698
18699 if (count > half_width)
18700 emit_insn (gen_ashr3 (low[0], low[0],
18701 GEN_INT (count - half_width)));
18702 }
18703 else
18704 {
18705 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
18706
18707 if (!rtx_equal_p (operands[0], operands[1]))
18708 emit_move_insn (operands[0], operands[1]);
18709
18710 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
18711 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
18712 }
18713 }
18714 else
18715 {
18716 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
18717
18718 if (!rtx_equal_p (operands[0], operands[1]))
18719 emit_move_insn (operands[0], operands[1]);
18720
18721 split_double_mode (mode, operands, 1, low, high);
18722
18723 emit_insn (gen_shrd (low[0], high[0], operands[2]));
18724 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
18725
18726 if (TARGET_CMOVE && scratch)
18727 {
18728 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
18729 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
18730
18731 emit_move_insn (scratch, high[0]);
18732 emit_insn (gen_ashr3 (scratch, scratch,
18733 GEN_INT (half_width - 1)));
18734 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
18735 scratch));
18736 }
18737 else
18738 {
18739 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
18740 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
18741
18742 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
18743 }
18744 }
18745 }
18746
18747 void
18748 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
18749 {
18750 rtx (*gen_lshr3)(rtx, rtx, rtx)
18751 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
18752 rtx (*gen_shrd)(rtx, rtx, rtx);
18753 int half_width = GET_MODE_BITSIZE (mode) >> 1;
18754
18755 rtx low[2], high[2];
18756 int count;
18757
18758 if (CONST_INT_P (operands[2]))
18759 {
18760 split_double_mode (mode, operands, 2, low, high);
18761 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
18762
18763 if (count >= half_width)
18764 {
18765 emit_move_insn (low[0], high[1]);
18766 ix86_expand_clear (high[0]);
18767
18768 if (count > half_width)
18769 emit_insn (gen_lshr3 (low[0], low[0],
18770 GEN_INT (count - half_width)));
18771 }
18772 else
18773 {
18774 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
18775
18776 if (!rtx_equal_p (operands[0], operands[1]))
18777 emit_move_insn (operands[0], operands[1]);
18778
18779 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
18780 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
18781 }
18782 }
18783 else
18784 {
18785 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
18786
18787 if (!rtx_equal_p (operands[0], operands[1]))
18788 emit_move_insn (operands[0], operands[1]);
18789
18790 split_double_mode (mode, operands, 1, low, high);
18791
18792 emit_insn (gen_shrd (low[0], high[0], operands[2]));
18793 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
18794
18795 if (TARGET_CMOVE && scratch)
18796 {
18797 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
18798 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
18799
18800 ix86_expand_clear (scratch);
18801 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
18802 scratch));
18803 }
18804 else
18805 {
18806 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
18807 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
18808
18809 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
18810 }
18811 }
18812 }
18813
18814 /* Predict just emitted jump instruction to be taken with probability PROB. */
18815 static void
18816 predict_jump (int prob)
18817 {
18818 rtx insn = get_last_insn ();
18819 gcc_assert (JUMP_P (insn));
18820 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
18821 }
18822
18823 /* Helper function for the string operations below. Dest VARIABLE whether
18824 it is aligned to VALUE bytes. If true, jump to the label. */
18825 static rtx
18826 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
18827 {
18828 rtx label = gen_label_rtx ();
18829 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
18830 if (GET_MODE (variable) == DImode)
18831 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
18832 else
18833 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
18834 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
18835 1, label);
18836 if (epilogue)
18837 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18838 else
18839 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18840 return label;
18841 }
18842
18843 /* Adjust COUNTER by the VALUE. */
18844 static void
18845 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
18846 {
18847 rtx (*gen_add)(rtx, rtx, rtx)
18848 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
18849
18850 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
18851 }
18852
18853 /* Zero extend possibly SImode EXP to Pmode register. */
18854 rtx
18855 ix86_zero_extend_to_Pmode (rtx exp)
18856 {
18857 rtx r;
18858 if (GET_MODE (exp) == VOIDmode)
18859 return force_reg (Pmode, exp);
18860 if (GET_MODE (exp) == Pmode)
18861 return copy_to_mode_reg (Pmode, exp);
18862 r = gen_reg_rtx (Pmode);
18863 emit_insn (gen_zero_extendsidi2 (r, exp));
18864 return r;
18865 }
18866
18867 /* Divide COUNTREG by SCALE. */
18868 static rtx
18869 scale_counter (rtx countreg, int scale)
18870 {
18871 rtx sc;
18872
18873 if (scale == 1)
18874 return countreg;
18875 if (CONST_INT_P (countreg))
18876 return GEN_INT (INTVAL (countreg) / scale);
18877 gcc_assert (REG_P (countreg));
18878
18879 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
18880 GEN_INT (exact_log2 (scale)),
18881 NULL, 1, OPTAB_DIRECT);
18882 return sc;
18883 }
18884
18885 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
18886 DImode for constant loop counts. */
18887
18888 static enum machine_mode
18889 counter_mode (rtx count_exp)
18890 {
18891 if (GET_MODE (count_exp) != VOIDmode)
18892 return GET_MODE (count_exp);
18893 if (!CONST_INT_P (count_exp))
18894 return Pmode;
18895 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
18896 return DImode;
18897 return SImode;
18898 }
18899
18900 /* When SRCPTR is non-NULL, output simple loop to move memory
18901 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
18902 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
18903 equivalent loop to set memory by VALUE (supposed to be in MODE).
18904
18905 The size is rounded down to whole number of chunk size moved at once.
18906 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
18907
18908
18909 static void
18910 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
18911 rtx destptr, rtx srcptr, rtx value,
18912 rtx count, enum machine_mode mode, int unroll,
18913 int expected_size)
18914 {
18915 rtx out_label, top_label, iter, tmp;
18916 enum machine_mode iter_mode = counter_mode (count);
18917 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
18918 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
18919 rtx size;
18920 rtx x_addr;
18921 rtx y_addr;
18922 int i;
18923
18924 top_label = gen_label_rtx ();
18925 out_label = gen_label_rtx ();
18926 iter = gen_reg_rtx (iter_mode);
18927
18928 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
18929 NULL, 1, OPTAB_DIRECT);
18930 /* Those two should combine. */
18931 if (piece_size == const1_rtx)
18932 {
18933 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
18934 true, out_label);
18935 predict_jump (REG_BR_PROB_BASE * 10 / 100);
18936 }
18937 emit_move_insn (iter, const0_rtx);
18938
18939 emit_label (top_label);
18940
18941 tmp = convert_modes (Pmode, iter_mode, iter, true);
18942 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
18943 destmem = change_address (destmem, mode, x_addr);
18944
18945 if (srcmem)
18946 {
18947 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
18948 srcmem = change_address (srcmem, mode, y_addr);
18949
18950 /* When unrolling for chips that reorder memory reads and writes,
18951 we can save registers by using single temporary.
18952 Also using 4 temporaries is overkill in 32bit mode. */
18953 if (!TARGET_64BIT && 0)
18954 {
18955 for (i = 0; i < unroll; i++)
18956 {
18957 if (i)
18958 {
18959 destmem =
18960 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18961 srcmem =
18962 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
18963 }
18964 emit_move_insn (destmem, srcmem);
18965 }
18966 }
18967 else
18968 {
18969 rtx tmpreg[4];
18970 gcc_assert (unroll <= 4);
18971 for (i = 0; i < unroll; i++)
18972 {
18973 tmpreg[i] = gen_reg_rtx (mode);
18974 if (i)
18975 {
18976 srcmem =
18977 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
18978 }
18979 emit_move_insn (tmpreg[i], srcmem);
18980 }
18981 for (i = 0; i < unroll; i++)
18982 {
18983 if (i)
18984 {
18985 destmem =
18986 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18987 }
18988 emit_move_insn (destmem, tmpreg[i]);
18989 }
18990 }
18991 }
18992 else
18993 for (i = 0; i < unroll; i++)
18994 {
18995 if (i)
18996 destmem =
18997 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18998 emit_move_insn (destmem, value);
18999 }
19000
19001 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
19002 true, OPTAB_LIB_WIDEN);
19003 if (tmp != iter)
19004 emit_move_insn (iter, tmp);
19005
19006 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
19007 true, top_label);
19008 if (expected_size != -1)
19009 {
19010 expected_size /= GET_MODE_SIZE (mode) * unroll;
19011 if (expected_size == 0)
19012 predict_jump (0);
19013 else if (expected_size > REG_BR_PROB_BASE)
19014 predict_jump (REG_BR_PROB_BASE - 1);
19015 else
19016 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
19017 }
19018 else
19019 predict_jump (REG_BR_PROB_BASE * 80 / 100);
19020 iter = ix86_zero_extend_to_Pmode (iter);
19021 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
19022 true, OPTAB_LIB_WIDEN);
19023 if (tmp != destptr)
19024 emit_move_insn (destptr, tmp);
19025 if (srcptr)
19026 {
19027 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
19028 true, OPTAB_LIB_WIDEN);
19029 if (tmp != srcptr)
19030 emit_move_insn (srcptr, tmp);
19031 }
19032 emit_label (out_label);
19033 }
19034
19035 /* Output "rep; mov" instruction.
19036 Arguments have same meaning as for previous function */
19037 static void
19038 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
19039 rtx destptr, rtx srcptr,
19040 rtx count,
19041 enum machine_mode mode)
19042 {
19043 rtx destexp;
19044 rtx srcexp;
19045 rtx countreg;
19046
19047 /* If the size is known, it is shorter to use rep movs. */
19048 if (mode == QImode && CONST_INT_P (count)
19049 && !(INTVAL (count) & 3))
19050 mode = SImode;
19051
19052 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19053 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19054 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
19055 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
19056 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19057 if (mode != QImode)
19058 {
19059 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19060 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19061 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19062 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
19063 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19064 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
19065 }
19066 else
19067 {
19068 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19069 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
19070 }
19071 if (CONST_INT_P (count))
19072 {
19073 count = GEN_INT (INTVAL (count)
19074 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19075 destmem = shallow_copy_rtx (destmem);
19076 srcmem = shallow_copy_rtx (srcmem);
19077 set_mem_size (destmem, count);
19078 set_mem_size (srcmem, count);
19079 }
19080 else
19081 {
19082 if (MEM_SIZE (destmem))
19083 set_mem_size (destmem, NULL_RTX);
19084 if (MEM_SIZE (srcmem))
19085 set_mem_size (srcmem, NULL_RTX);
19086 }
19087 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
19088 destexp, srcexp));
19089 }
19090
19091 /* Output "rep; stos" instruction.
19092 Arguments have same meaning as for previous function */
19093 static void
19094 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
19095 rtx count, enum machine_mode mode,
19096 rtx orig_value)
19097 {
19098 rtx destexp;
19099 rtx countreg;
19100
19101 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19102 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19103 value = force_reg (mode, gen_lowpart (mode, value));
19104 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19105 if (mode != QImode)
19106 {
19107 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19108 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19109 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19110 }
19111 else
19112 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19113 if (orig_value == const0_rtx && CONST_INT_P (count))
19114 {
19115 count = GEN_INT (INTVAL (count)
19116 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19117 destmem = shallow_copy_rtx (destmem);
19118 set_mem_size (destmem, count);
19119 }
19120 else if (MEM_SIZE (destmem))
19121 set_mem_size (destmem, NULL_RTX);
19122 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
19123 }
19124
19125 static void
19126 emit_strmov (rtx destmem, rtx srcmem,
19127 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
19128 {
19129 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
19130 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
19131 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19132 }
19133
19134 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
19135 static void
19136 expand_movmem_epilogue (rtx destmem, rtx srcmem,
19137 rtx destptr, rtx srcptr, rtx count, int max_size)
19138 {
19139 rtx src, dest;
19140 if (CONST_INT_P (count))
19141 {
19142 HOST_WIDE_INT countval = INTVAL (count);
19143 int offset = 0;
19144
19145 if ((countval & 0x10) && max_size > 16)
19146 {
19147 if (TARGET_64BIT)
19148 {
19149 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19150 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
19151 }
19152 else
19153 gcc_unreachable ();
19154 offset += 16;
19155 }
19156 if ((countval & 0x08) && max_size > 8)
19157 {
19158 if (TARGET_64BIT)
19159 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19160 else
19161 {
19162 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19163 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
19164 }
19165 offset += 8;
19166 }
19167 if ((countval & 0x04) && max_size > 4)
19168 {
19169 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19170 offset += 4;
19171 }
19172 if ((countval & 0x02) && max_size > 2)
19173 {
19174 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
19175 offset += 2;
19176 }
19177 if ((countval & 0x01) && max_size > 1)
19178 {
19179 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
19180 offset += 1;
19181 }
19182 return;
19183 }
19184 if (max_size > 8)
19185 {
19186 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
19187 count, 1, OPTAB_DIRECT);
19188 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
19189 count, QImode, 1, 4);
19190 return;
19191 }
19192
19193 /* When there are stringops, we can cheaply increase dest and src pointers.
19194 Otherwise we save code size by maintaining offset (zero is readily
19195 available from preceding rep operation) and using x86 addressing modes.
19196 */
19197 if (TARGET_SINGLE_STRINGOP)
19198 {
19199 if (max_size > 4)
19200 {
19201 rtx label = ix86_expand_aligntest (count, 4, true);
19202 src = change_address (srcmem, SImode, srcptr);
19203 dest = change_address (destmem, SImode, destptr);
19204 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19205 emit_label (label);
19206 LABEL_NUSES (label) = 1;
19207 }
19208 if (max_size > 2)
19209 {
19210 rtx label = ix86_expand_aligntest (count, 2, true);
19211 src = change_address (srcmem, HImode, srcptr);
19212 dest = change_address (destmem, HImode, destptr);
19213 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19214 emit_label (label);
19215 LABEL_NUSES (label) = 1;
19216 }
19217 if (max_size > 1)
19218 {
19219 rtx label = ix86_expand_aligntest (count, 1, true);
19220 src = change_address (srcmem, QImode, srcptr);
19221 dest = change_address (destmem, QImode, destptr);
19222 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19223 emit_label (label);
19224 LABEL_NUSES (label) = 1;
19225 }
19226 }
19227 else
19228 {
19229 rtx offset = force_reg (Pmode, const0_rtx);
19230 rtx tmp;
19231
19232 if (max_size > 4)
19233 {
19234 rtx label = ix86_expand_aligntest (count, 4, true);
19235 src = change_address (srcmem, SImode, srcptr);
19236 dest = change_address (destmem, SImode, destptr);
19237 emit_move_insn (dest, src);
19238 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
19239 true, OPTAB_LIB_WIDEN);
19240 if (tmp != offset)
19241 emit_move_insn (offset, tmp);
19242 emit_label (label);
19243 LABEL_NUSES (label) = 1;
19244 }
19245 if (max_size > 2)
19246 {
19247 rtx label = ix86_expand_aligntest (count, 2, true);
19248 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
19249 src = change_address (srcmem, HImode, tmp);
19250 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
19251 dest = change_address (destmem, HImode, tmp);
19252 emit_move_insn (dest, src);
19253 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
19254 true, OPTAB_LIB_WIDEN);
19255 if (tmp != offset)
19256 emit_move_insn (offset, tmp);
19257 emit_label (label);
19258 LABEL_NUSES (label) = 1;
19259 }
19260 if (max_size > 1)
19261 {
19262 rtx label = ix86_expand_aligntest (count, 1, true);
19263 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
19264 src = change_address (srcmem, QImode, tmp);
19265 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
19266 dest = change_address (destmem, QImode, tmp);
19267 emit_move_insn (dest, src);
19268 emit_label (label);
19269 LABEL_NUSES (label) = 1;
19270 }
19271 }
19272 }
19273
19274 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
19275 static void
19276 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
19277 rtx count, int max_size)
19278 {
19279 count =
19280 expand_simple_binop (counter_mode (count), AND, count,
19281 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
19282 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
19283 gen_lowpart (QImode, value), count, QImode,
19284 1, max_size / 2);
19285 }
19286
19287 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
19288 static void
19289 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
19290 {
19291 rtx dest;
19292
19293 if (CONST_INT_P (count))
19294 {
19295 HOST_WIDE_INT countval = INTVAL (count);
19296 int offset = 0;
19297
19298 if ((countval & 0x10) && max_size > 16)
19299 {
19300 if (TARGET_64BIT)
19301 {
19302 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
19303 emit_insn (gen_strset (destptr, dest, value));
19304 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
19305 emit_insn (gen_strset (destptr, dest, value));
19306 }
19307 else
19308 gcc_unreachable ();
19309 offset += 16;
19310 }
19311 if ((countval & 0x08) && max_size > 8)
19312 {
19313 if (TARGET_64BIT)
19314 {
19315 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
19316 emit_insn (gen_strset (destptr, dest, value));
19317 }
19318 else
19319 {
19320 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
19321 emit_insn (gen_strset (destptr, dest, value));
19322 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
19323 emit_insn (gen_strset (destptr, dest, value));
19324 }
19325 offset += 8;
19326 }
19327 if ((countval & 0x04) && max_size > 4)
19328 {
19329 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
19330 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
19331 offset += 4;
19332 }
19333 if ((countval & 0x02) && max_size > 2)
19334 {
19335 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
19336 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
19337 offset += 2;
19338 }
19339 if ((countval & 0x01) && max_size > 1)
19340 {
19341 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
19342 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
19343 offset += 1;
19344 }
19345 return;
19346 }
19347 if (max_size > 32)
19348 {
19349 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
19350 return;
19351 }
19352 if (max_size > 16)
19353 {
19354 rtx label = ix86_expand_aligntest (count, 16, true);
19355 if (TARGET_64BIT)
19356 {
19357 dest = change_address (destmem, DImode, destptr);
19358 emit_insn (gen_strset (destptr, dest, value));
19359 emit_insn (gen_strset (destptr, dest, value));
19360 }
19361 else
19362 {
19363 dest = change_address (destmem, SImode, destptr);
19364 emit_insn (gen_strset (destptr, dest, value));
19365 emit_insn (gen_strset (destptr, dest, value));
19366 emit_insn (gen_strset (destptr, dest, value));
19367 emit_insn (gen_strset (destptr, dest, value));
19368 }
19369 emit_label (label);
19370 LABEL_NUSES (label) = 1;
19371 }
19372 if (max_size > 8)
19373 {
19374 rtx label = ix86_expand_aligntest (count, 8, true);
19375 if (TARGET_64BIT)
19376 {
19377 dest = change_address (destmem, DImode, destptr);
19378 emit_insn (gen_strset (destptr, dest, value));
19379 }
19380 else
19381 {
19382 dest = change_address (destmem, SImode, destptr);
19383 emit_insn (gen_strset (destptr, dest, value));
19384 emit_insn (gen_strset (destptr, dest, value));
19385 }
19386 emit_label (label);
19387 LABEL_NUSES (label) = 1;
19388 }
19389 if (max_size > 4)
19390 {
19391 rtx label = ix86_expand_aligntest (count, 4, true);
19392 dest = change_address (destmem, SImode, destptr);
19393 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
19394 emit_label (label);
19395 LABEL_NUSES (label) = 1;
19396 }
19397 if (max_size > 2)
19398 {
19399 rtx label = ix86_expand_aligntest (count, 2, true);
19400 dest = change_address (destmem, HImode, destptr);
19401 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
19402 emit_label (label);
19403 LABEL_NUSES (label) = 1;
19404 }
19405 if (max_size > 1)
19406 {
19407 rtx label = ix86_expand_aligntest (count, 1, true);
19408 dest = change_address (destmem, QImode, destptr);
19409 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
19410 emit_label (label);
19411 LABEL_NUSES (label) = 1;
19412 }
19413 }
19414
19415 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
19416 DESIRED_ALIGNMENT. */
19417 static void
19418 expand_movmem_prologue (rtx destmem, rtx srcmem,
19419 rtx destptr, rtx srcptr, rtx count,
19420 int align, int desired_alignment)
19421 {
19422 if (align <= 1 && desired_alignment > 1)
19423 {
19424 rtx label = ix86_expand_aligntest (destptr, 1, false);
19425 srcmem = change_address (srcmem, QImode, srcptr);
19426 destmem = change_address (destmem, QImode, destptr);
19427 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
19428 ix86_adjust_counter (count, 1);
19429 emit_label (label);
19430 LABEL_NUSES (label) = 1;
19431 }
19432 if (align <= 2 && desired_alignment > 2)
19433 {
19434 rtx label = ix86_expand_aligntest (destptr, 2, false);
19435 srcmem = change_address (srcmem, HImode, srcptr);
19436 destmem = change_address (destmem, HImode, destptr);
19437 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
19438 ix86_adjust_counter (count, 2);
19439 emit_label (label);
19440 LABEL_NUSES (label) = 1;
19441 }
19442 if (align <= 4 && desired_alignment > 4)
19443 {
19444 rtx label = ix86_expand_aligntest (destptr, 4, false);
19445 srcmem = change_address (srcmem, SImode, srcptr);
19446 destmem = change_address (destmem, SImode, destptr);
19447 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
19448 ix86_adjust_counter (count, 4);
19449 emit_label (label);
19450 LABEL_NUSES (label) = 1;
19451 }
19452 gcc_assert (desired_alignment <= 8);
19453 }
19454
19455 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
19456 ALIGN_BYTES is how many bytes need to be copied. */
19457 static rtx
19458 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
19459 int desired_align, int align_bytes)
19460 {
19461 rtx src = *srcp;
19462 rtx src_size, dst_size;
19463 int off = 0;
19464 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
19465 if (src_align_bytes >= 0)
19466 src_align_bytes = desired_align - src_align_bytes;
19467 src_size = MEM_SIZE (src);
19468 dst_size = MEM_SIZE (dst);
19469 if (align_bytes & 1)
19470 {
19471 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
19472 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
19473 off = 1;
19474 emit_insn (gen_strmov (destreg, dst, srcreg, src));
19475 }
19476 if (align_bytes & 2)
19477 {
19478 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
19479 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
19480 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
19481 set_mem_align (dst, 2 * BITS_PER_UNIT);
19482 if (src_align_bytes >= 0
19483 && (src_align_bytes & 1) == (align_bytes & 1)
19484 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
19485 set_mem_align (src, 2 * BITS_PER_UNIT);
19486 off = 2;
19487 emit_insn (gen_strmov (destreg, dst, srcreg, src));
19488 }
19489 if (align_bytes & 4)
19490 {
19491 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
19492 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
19493 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
19494 set_mem_align (dst, 4 * BITS_PER_UNIT);
19495 if (src_align_bytes >= 0)
19496 {
19497 unsigned int src_align = 0;
19498 if ((src_align_bytes & 3) == (align_bytes & 3))
19499 src_align = 4;
19500 else if ((src_align_bytes & 1) == (align_bytes & 1))
19501 src_align = 2;
19502 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
19503 set_mem_align (src, src_align * BITS_PER_UNIT);
19504 }
19505 off = 4;
19506 emit_insn (gen_strmov (destreg, dst, srcreg, src));
19507 }
19508 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
19509 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
19510 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
19511 set_mem_align (dst, desired_align * BITS_PER_UNIT);
19512 if (src_align_bytes >= 0)
19513 {
19514 unsigned int src_align = 0;
19515 if ((src_align_bytes & 7) == (align_bytes & 7))
19516 src_align = 8;
19517 else if ((src_align_bytes & 3) == (align_bytes & 3))
19518 src_align = 4;
19519 else if ((src_align_bytes & 1) == (align_bytes & 1))
19520 src_align = 2;
19521 if (src_align > (unsigned int) desired_align)
19522 src_align = desired_align;
19523 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
19524 set_mem_align (src, src_align * BITS_PER_UNIT);
19525 }
19526 if (dst_size)
19527 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
19528 if (src_size)
19529 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
19530 *srcp = src;
19531 return dst;
19532 }
19533
19534 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
19535 DESIRED_ALIGNMENT. */
19536 static void
19537 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
19538 int align, int desired_alignment)
19539 {
19540 if (align <= 1 && desired_alignment > 1)
19541 {
19542 rtx label = ix86_expand_aligntest (destptr, 1, false);
19543 destmem = change_address (destmem, QImode, destptr);
19544 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
19545 ix86_adjust_counter (count, 1);
19546 emit_label (label);
19547 LABEL_NUSES (label) = 1;
19548 }
19549 if (align <= 2 && desired_alignment > 2)
19550 {
19551 rtx label = ix86_expand_aligntest (destptr, 2, false);
19552 destmem = change_address (destmem, HImode, destptr);
19553 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
19554 ix86_adjust_counter (count, 2);
19555 emit_label (label);
19556 LABEL_NUSES (label) = 1;
19557 }
19558 if (align <= 4 && desired_alignment > 4)
19559 {
19560 rtx label = ix86_expand_aligntest (destptr, 4, false);
19561 destmem = change_address (destmem, SImode, destptr);
19562 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
19563 ix86_adjust_counter (count, 4);
19564 emit_label (label);
19565 LABEL_NUSES (label) = 1;
19566 }
19567 gcc_assert (desired_alignment <= 8);
19568 }
19569
19570 /* Set enough from DST to align DST known to by aligned by ALIGN to
19571 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
19572 static rtx
19573 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
19574 int desired_align, int align_bytes)
19575 {
19576 int off = 0;
19577 rtx dst_size = MEM_SIZE (dst);
19578 if (align_bytes & 1)
19579 {
19580 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
19581 off = 1;
19582 emit_insn (gen_strset (destreg, dst,
19583 gen_lowpart (QImode, value)));
19584 }
19585 if (align_bytes & 2)
19586 {
19587 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
19588 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
19589 set_mem_align (dst, 2 * BITS_PER_UNIT);
19590 off = 2;
19591 emit_insn (gen_strset (destreg, dst,
19592 gen_lowpart (HImode, value)));
19593 }
19594 if (align_bytes & 4)
19595 {
19596 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
19597 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
19598 set_mem_align (dst, 4 * BITS_PER_UNIT);
19599 off = 4;
19600 emit_insn (gen_strset (destreg, dst,
19601 gen_lowpart (SImode, value)));
19602 }
19603 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
19604 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
19605 set_mem_align (dst, desired_align * BITS_PER_UNIT);
19606 if (dst_size)
19607 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
19608 return dst;
19609 }
19610
19611 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
19612 static enum stringop_alg
19613 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
19614 int *dynamic_check)
19615 {
19616 const struct stringop_algs * algs;
19617 bool optimize_for_speed;
19618 /* Algorithms using the rep prefix want at least edi and ecx;
19619 additionally, memset wants eax and memcpy wants esi. Don't
19620 consider such algorithms if the user has appropriated those
19621 registers for their own purposes. */
19622 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
19623 || (memset
19624 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
19625
19626 #define ALG_USABLE_P(alg) (rep_prefix_usable \
19627 || (alg != rep_prefix_1_byte \
19628 && alg != rep_prefix_4_byte \
19629 && alg != rep_prefix_8_byte))
19630 const struct processor_costs *cost;
19631
19632 /* Even if the string operation call is cold, we still might spend a lot
19633 of time processing large blocks. */
19634 if (optimize_function_for_size_p (cfun)
19635 || (optimize_insn_for_size_p ()
19636 && expected_size != -1 && expected_size < 256))
19637 optimize_for_speed = false;
19638 else
19639 optimize_for_speed = true;
19640
19641 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
19642
19643 *dynamic_check = -1;
19644 if (memset)
19645 algs = &cost->memset[TARGET_64BIT != 0];
19646 else
19647 algs = &cost->memcpy[TARGET_64BIT != 0];
19648 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
19649 return stringop_alg;
19650 /* rep; movq or rep; movl is the smallest variant. */
19651 else if (!optimize_for_speed)
19652 {
19653 if (!count || (count & 3))
19654 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
19655 else
19656 return rep_prefix_usable ? rep_prefix_4_byte : loop;
19657 }
19658 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
19659 */
19660 else if (expected_size != -1 && expected_size < 4)
19661 return loop_1_byte;
19662 else if (expected_size != -1)
19663 {
19664 unsigned int i;
19665 enum stringop_alg alg = libcall;
19666 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
19667 {
19668 /* We get here if the algorithms that were not libcall-based
19669 were rep-prefix based and we are unable to use rep prefixes
19670 based on global register usage. Break out of the loop and
19671 use the heuristic below. */
19672 if (algs->size[i].max == 0)
19673 break;
19674 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
19675 {
19676 enum stringop_alg candidate = algs->size[i].alg;
19677
19678 if (candidate != libcall && ALG_USABLE_P (candidate))
19679 alg = candidate;
19680 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
19681 last non-libcall inline algorithm. */
19682 if (TARGET_INLINE_ALL_STRINGOPS)
19683 {
19684 /* When the current size is best to be copied by a libcall,
19685 but we are still forced to inline, run the heuristic below
19686 that will pick code for medium sized blocks. */
19687 if (alg != libcall)
19688 return alg;
19689 break;
19690 }
19691 else if (ALG_USABLE_P (candidate))
19692 return candidate;
19693 }
19694 }
19695 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
19696 }
19697 /* When asked to inline the call anyway, try to pick meaningful choice.
19698 We look for maximal size of block that is faster to copy by hand and
19699 take blocks of at most of that size guessing that average size will
19700 be roughly half of the block.
19701
19702 If this turns out to be bad, we might simply specify the preferred
19703 choice in ix86_costs. */
19704 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19705 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
19706 {
19707 int max = -1;
19708 enum stringop_alg alg;
19709 int i;
19710 bool any_alg_usable_p = true;
19711
19712 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
19713 {
19714 enum stringop_alg candidate = algs->size[i].alg;
19715 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
19716
19717 if (candidate != libcall && candidate
19718 && ALG_USABLE_P (candidate))
19719 max = algs->size[i].max;
19720 }
19721 /* If there aren't any usable algorithms, then recursing on
19722 smaller sizes isn't going to find anything. Just return the
19723 simple byte-at-a-time copy loop. */
19724 if (!any_alg_usable_p)
19725 {
19726 /* Pick something reasonable. */
19727 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19728 *dynamic_check = 128;
19729 return loop_1_byte;
19730 }
19731 if (max == -1)
19732 max = 4096;
19733 alg = decide_alg (count, max / 2, memset, dynamic_check);
19734 gcc_assert (*dynamic_check == -1);
19735 gcc_assert (alg != libcall);
19736 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19737 *dynamic_check = max;
19738 return alg;
19739 }
19740 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
19741 #undef ALG_USABLE_P
19742 }
19743
19744 /* Decide on alignment. We know that the operand is already aligned to ALIGN
19745 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
19746 static int
19747 decide_alignment (int align,
19748 enum stringop_alg alg,
19749 int expected_size)
19750 {
19751 int desired_align = 0;
19752 switch (alg)
19753 {
19754 case no_stringop:
19755 gcc_unreachable ();
19756 case loop:
19757 case unrolled_loop:
19758 desired_align = GET_MODE_SIZE (Pmode);
19759 break;
19760 case rep_prefix_8_byte:
19761 desired_align = 8;
19762 break;
19763 case rep_prefix_4_byte:
19764 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
19765 copying whole cacheline at once. */
19766 if (TARGET_PENTIUMPRO)
19767 desired_align = 8;
19768 else
19769 desired_align = 4;
19770 break;
19771 case rep_prefix_1_byte:
19772 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
19773 copying whole cacheline at once. */
19774 if (TARGET_PENTIUMPRO)
19775 desired_align = 8;
19776 else
19777 desired_align = 1;
19778 break;
19779 case loop_1_byte:
19780 desired_align = 1;
19781 break;
19782 case libcall:
19783 return 0;
19784 }
19785
19786 if (optimize_size)
19787 desired_align = 1;
19788 if (desired_align < align)
19789 desired_align = align;
19790 if (expected_size != -1 && expected_size < 4)
19791 desired_align = align;
19792 return desired_align;
19793 }
19794
19795 /* Return the smallest power of 2 greater than VAL. */
19796 static int
19797 smallest_pow2_greater_than (int val)
19798 {
19799 int ret = 1;
19800 while (ret <= val)
19801 ret <<= 1;
19802 return ret;
19803 }
19804
19805 /* Expand string move (memcpy) operation. Use i386 string operations when
19806 profitable. expand_setmem contains similar code. The code depends upon
19807 architecture, block size and alignment, but always has the same
19808 overall structure:
19809
19810 1) Prologue guard: Conditional that jumps up to epilogues for small
19811 blocks that can be handled by epilogue alone. This is faster but
19812 also needed for correctness, since prologue assume the block is larger
19813 than the desired alignment.
19814
19815 Optional dynamic check for size and libcall for large
19816 blocks is emitted here too, with -minline-stringops-dynamically.
19817
19818 2) Prologue: copy first few bytes in order to get destination aligned
19819 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
19820 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
19821 We emit either a jump tree on power of two sized blocks, or a byte loop.
19822
19823 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
19824 with specified algorithm.
19825
19826 4) Epilogue: code copying tail of the block that is too small to be
19827 handled by main body (or up to size guarded by prologue guard). */
19828
19829 bool
19830 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
19831 rtx expected_align_exp, rtx expected_size_exp)
19832 {
19833 rtx destreg;
19834 rtx srcreg;
19835 rtx label = NULL;
19836 rtx tmp;
19837 rtx jump_around_label = NULL;
19838 HOST_WIDE_INT align = 1;
19839 unsigned HOST_WIDE_INT count = 0;
19840 HOST_WIDE_INT expected_size = -1;
19841 int size_needed = 0, epilogue_size_needed;
19842 int desired_align = 0, align_bytes = 0;
19843 enum stringop_alg alg;
19844 int dynamic_check;
19845 bool need_zero_guard = false;
19846
19847 if (CONST_INT_P (align_exp))
19848 align = INTVAL (align_exp);
19849 /* i386 can do misaligned access on reasonably increased cost. */
19850 if (CONST_INT_P (expected_align_exp)
19851 && INTVAL (expected_align_exp) > align)
19852 align = INTVAL (expected_align_exp);
19853 /* ALIGN is the minimum of destination and source alignment, but we care here
19854 just about destination alignment. */
19855 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
19856 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
19857
19858 if (CONST_INT_P (count_exp))
19859 count = expected_size = INTVAL (count_exp);
19860 if (CONST_INT_P (expected_size_exp) && count == 0)
19861 expected_size = INTVAL (expected_size_exp);
19862
19863 /* Make sure we don't need to care about overflow later on. */
19864 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
19865 return false;
19866
19867 /* Step 0: Decide on preferred algorithm, desired alignment and
19868 size of chunks to be copied by main loop. */
19869
19870 alg = decide_alg (count, expected_size, false, &dynamic_check);
19871 desired_align = decide_alignment (align, alg, expected_size);
19872
19873 if (!TARGET_ALIGN_STRINGOPS)
19874 align = desired_align;
19875
19876 if (alg == libcall)
19877 return false;
19878 gcc_assert (alg != no_stringop);
19879 if (!count)
19880 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
19881 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
19882 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
19883 switch (alg)
19884 {
19885 case libcall:
19886 case no_stringop:
19887 gcc_unreachable ();
19888 case loop:
19889 need_zero_guard = true;
19890 size_needed = GET_MODE_SIZE (Pmode);
19891 break;
19892 case unrolled_loop:
19893 need_zero_guard = true;
19894 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
19895 break;
19896 case rep_prefix_8_byte:
19897 size_needed = 8;
19898 break;
19899 case rep_prefix_4_byte:
19900 size_needed = 4;
19901 break;
19902 case rep_prefix_1_byte:
19903 size_needed = 1;
19904 break;
19905 case loop_1_byte:
19906 need_zero_guard = true;
19907 size_needed = 1;
19908 break;
19909 }
19910
19911 epilogue_size_needed = size_needed;
19912
19913 /* Step 1: Prologue guard. */
19914
19915 /* Alignment code needs count to be in register. */
19916 if (CONST_INT_P (count_exp) && desired_align > align)
19917 {
19918 if (INTVAL (count_exp) > desired_align
19919 && INTVAL (count_exp) > size_needed)
19920 {
19921 align_bytes
19922 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
19923 if (align_bytes <= 0)
19924 align_bytes = 0;
19925 else
19926 align_bytes = desired_align - align_bytes;
19927 }
19928 if (align_bytes == 0)
19929 count_exp = force_reg (counter_mode (count_exp), count_exp);
19930 }
19931 gcc_assert (desired_align >= 1 && align >= 1);
19932
19933 /* Ensure that alignment prologue won't copy past end of block. */
19934 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
19935 {
19936 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
19937 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
19938 Make sure it is power of 2. */
19939 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
19940
19941 if (count)
19942 {
19943 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
19944 {
19945 /* If main algorithm works on QImode, no epilogue is needed.
19946 For small sizes just don't align anything. */
19947 if (size_needed == 1)
19948 desired_align = align;
19949 else
19950 goto epilogue;
19951 }
19952 }
19953 else
19954 {
19955 label = gen_label_rtx ();
19956 emit_cmp_and_jump_insns (count_exp,
19957 GEN_INT (epilogue_size_needed),
19958 LTU, 0, counter_mode (count_exp), 1, label);
19959 if (expected_size == -1 || expected_size < epilogue_size_needed)
19960 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19961 else
19962 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19963 }
19964 }
19965
19966 /* Emit code to decide on runtime whether library call or inline should be
19967 used. */
19968 if (dynamic_check != -1)
19969 {
19970 if (CONST_INT_P (count_exp))
19971 {
19972 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
19973 {
19974 emit_block_move_via_libcall (dst, src, count_exp, false);
19975 count_exp = const0_rtx;
19976 goto epilogue;
19977 }
19978 }
19979 else
19980 {
19981 rtx hot_label = gen_label_rtx ();
19982 jump_around_label = gen_label_rtx ();
19983 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
19984 LEU, 0, GET_MODE (count_exp), 1, hot_label);
19985 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19986 emit_block_move_via_libcall (dst, src, count_exp, false);
19987 emit_jump (jump_around_label);
19988 emit_label (hot_label);
19989 }
19990 }
19991
19992 /* Step 2: Alignment prologue. */
19993
19994 if (desired_align > align)
19995 {
19996 if (align_bytes == 0)
19997 {
19998 /* Except for the first move in epilogue, we no longer know
19999 constant offset in aliasing info. It don't seems to worth
20000 the pain to maintain it for the first move, so throw away
20001 the info early. */
20002 src = change_address (src, BLKmode, srcreg);
20003 dst = change_address (dst, BLKmode, destreg);
20004 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
20005 desired_align);
20006 }
20007 else
20008 {
20009 /* If we know how many bytes need to be stored before dst is
20010 sufficiently aligned, maintain aliasing info accurately. */
20011 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
20012 desired_align, align_bytes);
20013 count_exp = plus_constant (count_exp, -align_bytes);
20014 count -= align_bytes;
20015 }
20016 if (need_zero_guard
20017 && (count < (unsigned HOST_WIDE_INT) size_needed
20018 || (align_bytes == 0
20019 && count < ((unsigned HOST_WIDE_INT) size_needed
20020 + desired_align - align))))
20021 {
20022 /* It is possible that we copied enough so the main loop will not
20023 execute. */
20024 gcc_assert (size_needed > 1);
20025 if (label == NULL_RTX)
20026 label = gen_label_rtx ();
20027 emit_cmp_and_jump_insns (count_exp,
20028 GEN_INT (size_needed),
20029 LTU, 0, counter_mode (count_exp), 1, label);
20030 if (expected_size == -1
20031 || expected_size < (desired_align - align) / 2 + size_needed)
20032 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20033 else
20034 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20035 }
20036 }
20037 if (label && size_needed == 1)
20038 {
20039 emit_label (label);
20040 LABEL_NUSES (label) = 1;
20041 label = NULL;
20042 epilogue_size_needed = 1;
20043 }
20044 else if (label == NULL_RTX)
20045 epilogue_size_needed = size_needed;
20046
20047 /* Step 3: Main loop. */
20048
20049 switch (alg)
20050 {
20051 case libcall:
20052 case no_stringop:
20053 gcc_unreachable ();
20054 case loop_1_byte:
20055 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20056 count_exp, QImode, 1, expected_size);
20057 break;
20058 case loop:
20059 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20060 count_exp, Pmode, 1, expected_size);
20061 break;
20062 case unrolled_loop:
20063 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
20064 registers for 4 temporaries anyway. */
20065 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20066 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
20067 expected_size);
20068 break;
20069 case rep_prefix_8_byte:
20070 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20071 DImode);
20072 break;
20073 case rep_prefix_4_byte:
20074 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20075 SImode);
20076 break;
20077 case rep_prefix_1_byte:
20078 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20079 QImode);
20080 break;
20081 }
20082 /* Adjust properly the offset of src and dest memory for aliasing. */
20083 if (CONST_INT_P (count_exp))
20084 {
20085 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
20086 (count / size_needed) * size_needed);
20087 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
20088 (count / size_needed) * size_needed);
20089 }
20090 else
20091 {
20092 src = change_address (src, BLKmode, srcreg);
20093 dst = change_address (dst, BLKmode, destreg);
20094 }
20095
20096 /* Step 4: Epilogue to copy the remaining bytes. */
20097 epilogue:
20098 if (label)
20099 {
20100 /* When the main loop is done, COUNT_EXP might hold original count,
20101 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
20102 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
20103 bytes. Compensate if needed. */
20104
20105 if (size_needed < epilogue_size_needed)
20106 {
20107 tmp =
20108 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
20109 GEN_INT (size_needed - 1), count_exp, 1,
20110 OPTAB_DIRECT);
20111 if (tmp != count_exp)
20112 emit_move_insn (count_exp, tmp);
20113 }
20114 emit_label (label);
20115 LABEL_NUSES (label) = 1;
20116 }
20117
20118 if (count_exp != const0_rtx && epilogue_size_needed > 1)
20119 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
20120 epilogue_size_needed);
20121 if (jump_around_label)
20122 emit_label (jump_around_label);
20123 return true;
20124 }
20125
20126 /* Helper function for memcpy. For QImode value 0xXY produce
20127 0xXYXYXYXY of wide specified by MODE. This is essentially
20128 a * 0x10101010, but we can do slightly better than
20129 synth_mult by unwinding the sequence by hand on CPUs with
20130 slow multiply. */
20131 static rtx
20132 promote_duplicated_reg (enum machine_mode mode, rtx val)
20133 {
20134 enum machine_mode valmode = GET_MODE (val);
20135 rtx tmp;
20136 int nops = mode == DImode ? 3 : 2;
20137
20138 gcc_assert (mode == SImode || mode == DImode);
20139 if (val == const0_rtx)
20140 return copy_to_mode_reg (mode, const0_rtx);
20141 if (CONST_INT_P (val))
20142 {
20143 HOST_WIDE_INT v = INTVAL (val) & 255;
20144
20145 v |= v << 8;
20146 v |= v << 16;
20147 if (mode == DImode)
20148 v |= (v << 16) << 16;
20149 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
20150 }
20151
20152 if (valmode == VOIDmode)
20153 valmode = QImode;
20154 if (valmode != QImode)
20155 val = gen_lowpart (QImode, val);
20156 if (mode == QImode)
20157 return val;
20158 if (!TARGET_PARTIAL_REG_STALL)
20159 nops--;
20160 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
20161 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
20162 <= (ix86_cost->shift_const + ix86_cost->add) * nops
20163 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
20164 {
20165 rtx reg = convert_modes (mode, QImode, val, true);
20166 tmp = promote_duplicated_reg (mode, const1_rtx);
20167 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
20168 OPTAB_DIRECT);
20169 }
20170 else
20171 {
20172 rtx reg = convert_modes (mode, QImode, val, true);
20173
20174 if (!TARGET_PARTIAL_REG_STALL)
20175 if (mode == SImode)
20176 emit_insn (gen_movsi_insv_1 (reg, reg));
20177 else
20178 emit_insn (gen_movdi_insv_1 (reg, reg));
20179 else
20180 {
20181 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
20182 NULL, 1, OPTAB_DIRECT);
20183 reg =
20184 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20185 }
20186 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
20187 NULL, 1, OPTAB_DIRECT);
20188 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20189 if (mode == SImode)
20190 return reg;
20191 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
20192 NULL, 1, OPTAB_DIRECT);
20193 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20194 return reg;
20195 }
20196 }
20197
20198 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
20199 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
20200 alignment from ALIGN to DESIRED_ALIGN. */
20201 static rtx
20202 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
20203 {
20204 rtx promoted_val;
20205
20206 if (TARGET_64BIT
20207 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
20208 promoted_val = promote_duplicated_reg (DImode, val);
20209 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
20210 promoted_val = promote_duplicated_reg (SImode, val);
20211 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
20212 promoted_val = promote_duplicated_reg (HImode, val);
20213 else
20214 promoted_val = val;
20215
20216 return promoted_val;
20217 }
20218
20219 /* Expand string clear operation (bzero). Use i386 string operations when
20220 profitable. See expand_movmem comment for explanation of individual
20221 steps performed. */
20222 bool
20223 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
20224 rtx expected_align_exp, rtx expected_size_exp)
20225 {
20226 rtx destreg;
20227 rtx label = NULL;
20228 rtx tmp;
20229 rtx jump_around_label = NULL;
20230 HOST_WIDE_INT align = 1;
20231 unsigned HOST_WIDE_INT count = 0;
20232 HOST_WIDE_INT expected_size = -1;
20233 int size_needed = 0, epilogue_size_needed;
20234 int desired_align = 0, align_bytes = 0;
20235 enum stringop_alg alg;
20236 rtx promoted_val = NULL;
20237 bool force_loopy_epilogue = false;
20238 int dynamic_check;
20239 bool need_zero_guard = false;
20240
20241 if (CONST_INT_P (align_exp))
20242 align = INTVAL (align_exp);
20243 /* i386 can do misaligned access on reasonably increased cost. */
20244 if (CONST_INT_P (expected_align_exp)
20245 && INTVAL (expected_align_exp) > align)
20246 align = INTVAL (expected_align_exp);
20247 if (CONST_INT_P (count_exp))
20248 count = expected_size = INTVAL (count_exp);
20249 if (CONST_INT_P (expected_size_exp) && count == 0)
20250 expected_size = INTVAL (expected_size_exp);
20251
20252 /* Make sure we don't need to care about overflow later on. */
20253 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
20254 return false;
20255
20256 /* Step 0: Decide on preferred algorithm, desired alignment and
20257 size of chunks to be copied by main loop. */
20258
20259 alg = decide_alg (count, expected_size, true, &dynamic_check);
20260 desired_align = decide_alignment (align, alg, expected_size);
20261
20262 if (!TARGET_ALIGN_STRINGOPS)
20263 align = desired_align;
20264
20265 if (alg == libcall)
20266 return false;
20267 gcc_assert (alg != no_stringop);
20268 if (!count)
20269 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
20270 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
20271 switch (alg)
20272 {
20273 case libcall:
20274 case no_stringop:
20275 gcc_unreachable ();
20276 case loop:
20277 need_zero_guard = true;
20278 size_needed = GET_MODE_SIZE (Pmode);
20279 break;
20280 case unrolled_loop:
20281 need_zero_guard = true;
20282 size_needed = GET_MODE_SIZE (Pmode) * 4;
20283 break;
20284 case rep_prefix_8_byte:
20285 size_needed = 8;
20286 break;
20287 case rep_prefix_4_byte:
20288 size_needed = 4;
20289 break;
20290 case rep_prefix_1_byte:
20291 size_needed = 1;
20292 break;
20293 case loop_1_byte:
20294 need_zero_guard = true;
20295 size_needed = 1;
20296 break;
20297 }
20298 epilogue_size_needed = size_needed;
20299
20300 /* Step 1: Prologue guard. */
20301
20302 /* Alignment code needs count to be in register. */
20303 if (CONST_INT_P (count_exp) && desired_align > align)
20304 {
20305 if (INTVAL (count_exp) > desired_align
20306 && INTVAL (count_exp) > size_needed)
20307 {
20308 align_bytes
20309 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
20310 if (align_bytes <= 0)
20311 align_bytes = 0;
20312 else
20313 align_bytes = desired_align - align_bytes;
20314 }
20315 if (align_bytes == 0)
20316 {
20317 enum machine_mode mode = SImode;
20318 if (TARGET_64BIT && (count & ~0xffffffff))
20319 mode = DImode;
20320 count_exp = force_reg (mode, count_exp);
20321 }
20322 }
20323 /* Do the cheap promotion to allow better CSE across the
20324 main loop and epilogue (ie one load of the big constant in the
20325 front of all code. */
20326 if (CONST_INT_P (val_exp))
20327 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
20328 desired_align, align);
20329 /* Ensure that alignment prologue won't copy past end of block. */
20330 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
20331 {
20332 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
20333 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
20334 Make sure it is power of 2. */
20335 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
20336
20337 /* To improve performance of small blocks, we jump around the VAL
20338 promoting mode. This mean that if the promoted VAL is not constant,
20339 we might not use it in the epilogue and have to use byte
20340 loop variant. */
20341 if (epilogue_size_needed > 2 && !promoted_val)
20342 force_loopy_epilogue = true;
20343 if (count)
20344 {
20345 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
20346 {
20347 /* If main algorithm works on QImode, no epilogue is needed.
20348 For small sizes just don't align anything. */
20349 if (size_needed == 1)
20350 desired_align = align;
20351 else
20352 goto epilogue;
20353 }
20354 }
20355 else
20356 {
20357 label = gen_label_rtx ();
20358 emit_cmp_and_jump_insns (count_exp,
20359 GEN_INT (epilogue_size_needed),
20360 LTU, 0, counter_mode (count_exp), 1, label);
20361 if (expected_size == -1 || expected_size <= epilogue_size_needed)
20362 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20363 else
20364 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20365 }
20366 }
20367 if (dynamic_check != -1)
20368 {
20369 rtx hot_label = gen_label_rtx ();
20370 jump_around_label = gen_label_rtx ();
20371 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
20372 LEU, 0, counter_mode (count_exp), 1, hot_label);
20373 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20374 set_storage_via_libcall (dst, count_exp, val_exp, false);
20375 emit_jump (jump_around_label);
20376 emit_label (hot_label);
20377 }
20378
20379 /* Step 2: Alignment prologue. */
20380
20381 /* Do the expensive promotion once we branched off the small blocks. */
20382 if (!promoted_val)
20383 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
20384 desired_align, align);
20385 gcc_assert (desired_align >= 1 && align >= 1);
20386
20387 if (desired_align > align)
20388 {
20389 if (align_bytes == 0)
20390 {
20391 /* Except for the first move in epilogue, we no longer know
20392 constant offset in aliasing info. It don't seems to worth
20393 the pain to maintain it for the first move, so throw away
20394 the info early. */
20395 dst = change_address (dst, BLKmode, destreg);
20396 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
20397 desired_align);
20398 }
20399 else
20400 {
20401 /* If we know how many bytes need to be stored before dst is
20402 sufficiently aligned, maintain aliasing info accurately. */
20403 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
20404 desired_align, align_bytes);
20405 count_exp = plus_constant (count_exp, -align_bytes);
20406 count -= align_bytes;
20407 }
20408 if (need_zero_guard
20409 && (count < (unsigned HOST_WIDE_INT) size_needed
20410 || (align_bytes == 0
20411 && count < ((unsigned HOST_WIDE_INT) size_needed
20412 + desired_align - align))))
20413 {
20414 /* It is possible that we copied enough so the main loop will not
20415 execute. */
20416 gcc_assert (size_needed > 1);
20417 if (label == NULL_RTX)
20418 label = gen_label_rtx ();
20419 emit_cmp_and_jump_insns (count_exp,
20420 GEN_INT (size_needed),
20421 LTU, 0, counter_mode (count_exp), 1, label);
20422 if (expected_size == -1
20423 || expected_size < (desired_align - align) / 2 + size_needed)
20424 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20425 else
20426 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20427 }
20428 }
20429 if (label && size_needed == 1)
20430 {
20431 emit_label (label);
20432 LABEL_NUSES (label) = 1;
20433 label = NULL;
20434 promoted_val = val_exp;
20435 epilogue_size_needed = 1;
20436 }
20437 else if (label == NULL_RTX)
20438 epilogue_size_needed = size_needed;
20439
20440 /* Step 3: Main loop. */
20441
20442 switch (alg)
20443 {
20444 case libcall:
20445 case no_stringop:
20446 gcc_unreachable ();
20447 case loop_1_byte:
20448 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
20449 count_exp, QImode, 1, expected_size);
20450 break;
20451 case loop:
20452 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
20453 count_exp, Pmode, 1, expected_size);
20454 break;
20455 case unrolled_loop:
20456 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
20457 count_exp, Pmode, 4, expected_size);
20458 break;
20459 case rep_prefix_8_byte:
20460 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
20461 DImode, val_exp);
20462 break;
20463 case rep_prefix_4_byte:
20464 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
20465 SImode, val_exp);
20466 break;
20467 case rep_prefix_1_byte:
20468 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
20469 QImode, val_exp);
20470 break;
20471 }
20472 /* Adjust properly the offset of src and dest memory for aliasing. */
20473 if (CONST_INT_P (count_exp))
20474 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
20475 (count / size_needed) * size_needed);
20476 else
20477 dst = change_address (dst, BLKmode, destreg);
20478
20479 /* Step 4: Epilogue to copy the remaining bytes. */
20480
20481 if (label)
20482 {
20483 /* When the main loop is done, COUNT_EXP might hold original count,
20484 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
20485 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
20486 bytes. Compensate if needed. */
20487
20488 if (size_needed < epilogue_size_needed)
20489 {
20490 tmp =
20491 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
20492 GEN_INT (size_needed - 1), count_exp, 1,
20493 OPTAB_DIRECT);
20494 if (tmp != count_exp)
20495 emit_move_insn (count_exp, tmp);
20496 }
20497 emit_label (label);
20498 LABEL_NUSES (label) = 1;
20499 }
20500 epilogue:
20501 if (count_exp != const0_rtx && epilogue_size_needed > 1)
20502 {
20503 if (force_loopy_epilogue)
20504 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
20505 epilogue_size_needed);
20506 else
20507 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
20508 epilogue_size_needed);
20509 }
20510 if (jump_around_label)
20511 emit_label (jump_around_label);
20512 return true;
20513 }
20514
20515 /* Expand the appropriate insns for doing strlen if not just doing
20516 repnz; scasb
20517
20518 out = result, initialized with the start address
20519 align_rtx = alignment of the address.
20520 scratch = scratch register, initialized with the startaddress when
20521 not aligned, otherwise undefined
20522
20523 This is just the body. It needs the initializations mentioned above and
20524 some address computing at the end. These things are done in i386.md. */
20525
20526 static void
20527 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
20528 {
20529 int align;
20530 rtx tmp;
20531 rtx align_2_label = NULL_RTX;
20532 rtx align_3_label = NULL_RTX;
20533 rtx align_4_label = gen_label_rtx ();
20534 rtx end_0_label = gen_label_rtx ();
20535 rtx mem;
20536 rtx tmpreg = gen_reg_rtx (SImode);
20537 rtx scratch = gen_reg_rtx (SImode);
20538 rtx cmp;
20539
20540 align = 0;
20541 if (CONST_INT_P (align_rtx))
20542 align = INTVAL (align_rtx);
20543
20544 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
20545
20546 /* Is there a known alignment and is it less than 4? */
20547 if (align < 4)
20548 {
20549 rtx scratch1 = gen_reg_rtx (Pmode);
20550 emit_move_insn (scratch1, out);
20551 /* Is there a known alignment and is it not 2? */
20552 if (align != 2)
20553 {
20554 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
20555 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
20556
20557 /* Leave just the 3 lower bits. */
20558 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
20559 NULL_RTX, 0, OPTAB_WIDEN);
20560
20561 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
20562 Pmode, 1, align_4_label);
20563 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
20564 Pmode, 1, align_2_label);
20565 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
20566 Pmode, 1, align_3_label);
20567 }
20568 else
20569 {
20570 /* Since the alignment is 2, we have to check 2 or 0 bytes;
20571 check if is aligned to 4 - byte. */
20572
20573 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
20574 NULL_RTX, 0, OPTAB_WIDEN);
20575
20576 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
20577 Pmode, 1, align_4_label);
20578 }
20579
20580 mem = change_address (src, QImode, out);
20581
20582 /* Now compare the bytes. */
20583
20584 /* Compare the first n unaligned byte on a byte per byte basis. */
20585 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
20586 QImode, 1, end_0_label);
20587
20588 /* Increment the address. */
20589 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20590
20591 /* Not needed with an alignment of 2 */
20592 if (align != 2)
20593 {
20594 emit_label (align_2_label);
20595
20596 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
20597 end_0_label);
20598
20599 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20600
20601 emit_label (align_3_label);
20602 }
20603
20604 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
20605 end_0_label);
20606
20607 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20608 }
20609
20610 /* Generate loop to check 4 bytes at a time. It is not a good idea to
20611 align this loop. It gives only huge programs, but does not help to
20612 speed up. */
20613 emit_label (align_4_label);
20614
20615 mem = change_address (src, SImode, out);
20616 emit_move_insn (scratch, mem);
20617 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
20618
20619 /* This formula yields a nonzero result iff one of the bytes is zero.
20620 This saves three branches inside loop and many cycles. */
20621
20622 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
20623 emit_insn (gen_one_cmplsi2 (scratch, scratch));
20624 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
20625 emit_insn (gen_andsi3 (tmpreg, tmpreg,
20626 gen_int_mode (0x80808080, SImode)));
20627 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
20628 align_4_label);
20629
20630 if (TARGET_CMOVE)
20631 {
20632 rtx reg = gen_reg_rtx (SImode);
20633 rtx reg2 = gen_reg_rtx (Pmode);
20634 emit_move_insn (reg, tmpreg);
20635 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
20636
20637 /* If zero is not in the first two bytes, move two bytes forward. */
20638 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
20639 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20640 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
20641 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
20642 gen_rtx_IF_THEN_ELSE (SImode, tmp,
20643 reg,
20644 tmpreg)));
20645 /* Emit lea manually to avoid clobbering of flags. */
20646 emit_insn (gen_rtx_SET (SImode, reg2,
20647 gen_rtx_PLUS (Pmode, out, const2_rtx)));
20648
20649 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20650 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
20651 emit_insn (gen_rtx_SET (VOIDmode, out,
20652 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
20653 reg2,
20654 out)));
20655 }
20656 else
20657 {
20658 rtx end_2_label = gen_label_rtx ();
20659 /* Is zero in the first two bytes? */
20660
20661 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
20662 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20663 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
20664 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20665 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
20666 pc_rtx);
20667 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20668 JUMP_LABEL (tmp) = end_2_label;
20669
20670 /* Not in the first two. Move two bytes forward. */
20671 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
20672 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
20673
20674 emit_label (end_2_label);
20675
20676 }
20677
20678 /* Avoid branch in fixing the byte. */
20679 tmpreg = gen_lowpart (QImode, tmpreg);
20680 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
20681 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
20682 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
20683 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
20684
20685 emit_label (end_0_label);
20686 }
20687
20688 /* Expand strlen. */
20689
20690 bool
20691 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
20692 {
20693 rtx addr, scratch1, scratch2, scratch3, scratch4;
20694
20695 /* The generic case of strlen expander is long. Avoid it's
20696 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
20697
20698 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
20699 && !TARGET_INLINE_ALL_STRINGOPS
20700 && !optimize_insn_for_size_p ()
20701 && (!CONST_INT_P (align) || INTVAL (align) < 4))
20702 return false;
20703
20704 addr = force_reg (Pmode, XEXP (src, 0));
20705 scratch1 = gen_reg_rtx (Pmode);
20706
20707 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
20708 && !optimize_insn_for_size_p ())
20709 {
20710 /* Well it seems that some optimizer does not combine a call like
20711 foo(strlen(bar), strlen(bar));
20712 when the move and the subtraction is done here. It does calculate
20713 the length just once when these instructions are done inside of
20714 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
20715 often used and I use one fewer register for the lifetime of
20716 output_strlen_unroll() this is better. */
20717
20718 emit_move_insn (out, addr);
20719
20720 ix86_expand_strlensi_unroll_1 (out, src, align);
20721
20722 /* strlensi_unroll_1 returns the address of the zero at the end of
20723 the string, like memchr(), so compute the length by subtracting
20724 the start address. */
20725 emit_insn (ix86_gen_sub3 (out, out, addr));
20726 }
20727 else
20728 {
20729 rtx unspec;
20730
20731 /* Can't use this if the user has appropriated eax, ecx, or edi. */
20732 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
20733 return false;
20734
20735 scratch2 = gen_reg_rtx (Pmode);
20736 scratch3 = gen_reg_rtx (Pmode);
20737 scratch4 = force_reg (Pmode, constm1_rtx);
20738
20739 emit_move_insn (scratch3, addr);
20740 eoschar = force_reg (QImode, eoschar);
20741
20742 src = replace_equiv_address_nv (src, scratch3);
20743
20744 /* If .md starts supporting :P, this can be done in .md. */
20745 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
20746 scratch4), UNSPEC_SCAS);
20747 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
20748 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
20749 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
20750 }
20751 return true;
20752 }
20753
20754 /* For given symbol (function) construct code to compute address of it's PLT
20755 entry in large x86-64 PIC model. */
20756 rtx
20757 construct_plt_address (rtx symbol)
20758 {
20759 rtx tmp = gen_reg_rtx (Pmode);
20760 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
20761
20762 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
20763 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
20764
20765 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
20766 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
20767 return tmp;
20768 }
20769
20770 rtx
20771 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
20772 rtx callarg2,
20773 rtx pop, int sibcall)
20774 {
20775 rtx use = NULL, call;
20776
20777 if (pop == const0_rtx)
20778 pop = NULL;
20779 gcc_assert (!TARGET_64BIT || !pop);
20780
20781 if (TARGET_MACHO && !TARGET_64BIT)
20782 {
20783 #if TARGET_MACHO
20784 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
20785 fnaddr = machopic_indirect_call_target (fnaddr);
20786 #endif
20787 }
20788 else
20789 {
20790 /* Static functions and indirect calls don't need the pic register. */
20791 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
20792 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20793 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
20794 use_reg (&use, pic_offset_table_rtx);
20795 }
20796
20797 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
20798 {
20799 rtx al = gen_rtx_REG (QImode, AX_REG);
20800 emit_move_insn (al, callarg2);
20801 use_reg (&use, al);
20802 }
20803
20804 if (ix86_cmodel == CM_LARGE_PIC
20805 && MEM_P (fnaddr)
20806 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20807 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
20808 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
20809 else if (sibcall
20810 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
20811 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
20812 {
20813 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
20814 fnaddr = gen_rtx_MEM (QImode, fnaddr);
20815 }
20816
20817 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
20818 if (retval)
20819 call = gen_rtx_SET (VOIDmode, retval, call);
20820 if (pop)
20821 {
20822 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
20823 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
20824 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
20825 }
20826 if (TARGET_64BIT
20827 && ix86_cfun_abi () == MS_ABI
20828 && (!callarg2 || INTVAL (callarg2) != -2))
20829 {
20830 /* We need to represent that SI and DI registers are clobbered
20831 by SYSV calls. */
20832 static int clobbered_registers[] = {
20833 XMM6_REG, XMM7_REG, XMM8_REG,
20834 XMM9_REG, XMM10_REG, XMM11_REG,
20835 XMM12_REG, XMM13_REG, XMM14_REG,
20836 XMM15_REG, SI_REG, DI_REG
20837 };
20838 unsigned int i;
20839 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
20840 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
20841 UNSPEC_MS_TO_SYSV_CALL);
20842
20843 vec[0] = call;
20844 vec[1] = unspec;
20845 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
20846 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
20847 ? TImode : DImode,
20848 gen_rtx_REG
20849 (SSE_REGNO_P (clobbered_registers[i])
20850 ? TImode : DImode,
20851 clobbered_registers[i]));
20852
20853 call = gen_rtx_PARALLEL (VOIDmode,
20854 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
20855 + 2, vec));
20856 }
20857
20858 call = emit_call_insn (call);
20859 if (use)
20860 CALL_INSN_FUNCTION_USAGE (call) = use;
20861
20862 return call;
20863 }
20864
20865 \f
20866 /* Clear stack slot assignments remembered from previous functions.
20867 This is called from INIT_EXPANDERS once before RTL is emitted for each
20868 function. */
20869
20870 static struct machine_function *
20871 ix86_init_machine_status (void)
20872 {
20873 struct machine_function *f;
20874
20875 f = ggc_alloc_cleared_machine_function ();
20876 f->use_fast_prologue_epilogue_nregs = -1;
20877 f->tls_descriptor_call_expanded_p = 0;
20878 f->call_abi = ix86_abi;
20879
20880 return f;
20881 }
20882
20883 /* Return a MEM corresponding to a stack slot with mode MODE.
20884 Allocate a new slot if necessary.
20885
20886 The RTL for a function can have several slots available: N is
20887 which slot to use. */
20888
20889 rtx
20890 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
20891 {
20892 struct stack_local_entry *s;
20893
20894 gcc_assert (n < MAX_386_STACK_LOCALS);
20895
20896 /* Virtual slot is valid only before vregs are instantiated. */
20897 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
20898
20899 for (s = ix86_stack_locals; s; s = s->next)
20900 if (s->mode == mode && s->n == n)
20901 return copy_rtx (s->rtl);
20902
20903 s = ggc_alloc_stack_local_entry ();
20904 s->n = n;
20905 s->mode = mode;
20906 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
20907
20908 s->next = ix86_stack_locals;
20909 ix86_stack_locals = s;
20910 return s->rtl;
20911 }
20912
20913 /* Construct the SYMBOL_REF for the tls_get_addr function. */
20914
20915 static GTY(()) rtx ix86_tls_symbol;
20916 rtx
20917 ix86_tls_get_addr (void)
20918 {
20919
20920 if (!ix86_tls_symbol)
20921 {
20922 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
20923 (TARGET_ANY_GNU_TLS
20924 && !TARGET_64BIT)
20925 ? "___tls_get_addr"
20926 : "__tls_get_addr");
20927 }
20928
20929 return ix86_tls_symbol;
20930 }
20931
20932 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
20933
20934 static GTY(()) rtx ix86_tls_module_base_symbol;
20935 rtx
20936 ix86_tls_module_base (void)
20937 {
20938
20939 if (!ix86_tls_module_base_symbol)
20940 {
20941 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
20942 "_TLS_MODULE_BASE_");
20943 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
20944 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
20945 }
20946
20947 return ix86_tls_module_base_symbol;
20948 }
20949 \f
20950 /* Calculate the length of the memory address in the instruction
20951 encoding. Does not include the one-byte modrm, opcode, or prefix. */
20952
20953 int
20954 memory_address_length (rtx addr)
20955 {
20956 struct ix86_address parts;
20957 rtx base, index, disp;
20958 int len;
20959 int ok;
20960
20961 if (GET_CODE (addr) == PRE_DEC
20962 || GET_CODE (addr) == POST_INC
20963 || GET_CODE (addr) == PRE_MODIFY
20964 || GET_CODE (addr) == POST_MODIFY)
20965 return 0;
20966
20967 ok = ix86_decompose_address (addr, &parts);
20968 gcc_assert (ok);
20969
20970 if (parts.base && GET_CODE (parts.base) == SUBREG)
20971 parts.base = SUBREG_REG (parts.base);
20972 if (parts.index && GET_CODE (parts.index) == SUBREG)
20973 parts.index = SUBREG_REG (parts.index);
20974
20975 base = parts.base;
20976 index = parts.index;
20977 disp = parts.disp;
20978 len = 0;
20979
20980 /* Rule of thumb:
20981 - esp as the base always wants an index,
20982 - ebp as the base always wants a displacement,
20983 - r12 as the base always wants an index,
20984 - r13 as the base always wants a displacement. */
20985
20986 /* Register Indirect. */
20987 if (base && !index && !disp)
20988 {
20989 /* esp (for its index) and ebp (for its displacement) need
20990 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
20991 code. */
20992 if (REG_P (addr)
20993 && (addr == arg_pointer_rtx
20994 || addr == frame_pointer_rtx
20995 || REGNO (addr) == SP_REG
20996 || REGNO (addr) == BP_REG
20997 || REGNO (addr) == R12_REG
20998 || REGNO (addr) == R13_REG))
20999 len = 1;
21000 }
21001
21002 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
21003 is not disp32, but disp32(%rip), so for disp32
21004 SIB byte is needed, unless print_operand_address
21005 optimizes it into disp32(%rip) or (%rip) is implied
21006 by UNSPEC. */
21007 else if (disp && !base && !index)
21008 {
21009 len = 4;
21010 if (TARGET_64BIT)
21011 {
21012 rtx symbol = disp;
21013
21014 if (GET_CODE (disp) == CONST)
21015 symbol = XEXP (disp, 0);
21016 if (GET_CODE (symbol) == PLUS
21017 && CONST_INT_P (XEXP (symbol, 1)))
21018 symbol = XEXP (symbol, 0);
21019
21020 if (GET_CODE (symbol) != LABEL_REF
21021 && (GET_CODE (symbol) != SYMBOL_REF
21022 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
21023 && (GET_CODE (symbol) != UNSPEC
21024 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
21025 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
21026 len += 1;
21027 }
21028 }
21029
21030 else
21031 {
21032 /* Find the length of the displacement constant. */
21033 if (disp)
21034 {
21035 if (base && satisfies_constraint_K (disp))
21036 len = 1;
21037 else
21038 len = 4;
21039 }
21040 /* ebp always wants a displacement. Similarly r13. */
21041 else if (base && REG_P (base)
21042 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
21043 len = 1;
21044
21045 /* An index requires the two-byte modrm form.... */
21046 if (index
21047 /* ...like esp (or r12), which always wants an index. */
21048 || base == arg_pointer_rtx
21049 || base == frame_pointer_rtx
21050 || (base && REG_P (base)
21051 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
21052 len += 1;
21053 }
21054
21055 switch (parts.seg)
21056 {
21057 case SEG_FS:
21058 case SEG_GS:
21059 len += 1;
21060 break;
21061 default:
21062 break;
21063 }
21064
21065 return len;
21066 }
21067
21068 /* Compute default value for "length_immediate" attribute. When SHORTFORM
21069 is set, expect that insn have 8bit immediate alternative. */
21070 int
21071 ix86_attr_length_immediate_default (rtx insn, int shortform)
21072 {
21073 int len = 0;
21074 int i;
21075 extract_insn_cached (insn);
21076 for (i = recog_data.n_operands - 1; i >= 0; --i)
21077 if (CONSTANT_P (recog_data.operand[i]))
21078 {
21079 enum attr_mode mode = get_attr_mode (insn);
21080
21081 gcc_assert (!len);
21082 if (shortform && CONST_INT_P (recog_data.operand[i]))
21083 {
21084 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
21085 switch (mode)
21086 {
21087 case MODE_QI:
21088 len = 1;
21089 continue;
21090 case MODE_HI:
21091 ival = trunc_int_for_mode (ival, HImode);
21092 break;
21093 case MODE_SI:
21094 ival = trunc_int_for_mode (ival, SImode);
21095 break;
21096 default:
21097 break;
21098 }
21099 if (IN_RANGE (ival, -128, 127))
21100 {
21101 len = 1;
21102 continue;
21103 }
21104 }
21105 switch (mode)
21106 {
21107 case MODE_QI:
21108 len = 1;
21109 break;
21110 case MODE_HI:
21111 len = 2;
21112 break;
21113 case MODE_SI:
21114 len = 4;
21115 break;
21116 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
21117 case MODE_DI:
21118 len = 4;
21119 break;
21120 default:
21121 fatal_insn ("unknown insn mode", insn);
21122 }
21123 }
21124 return len;
21125 }
21126 /* Compute default value for "length_address" attribute. */
21127 int
21128 ix86_attr_length_address_default (rtx insn)
21129 {
21130 int i;
21131
21132 if (get_attr_type (insn) == TYPE_LEA)
21133 {
21134 rtx set = PATTERN (insn), addr;
21135
21136 if (GET_CODE (set) == PARALLEL)
21137 set = XVECEXP (set, 0, 0);
21138
21139 gcc_assert (GET_CODE (set) == SET);
21140
21141 addr = SET_SRC (set);
21142 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
21143 {
21144 if (GET_CODE (addr) == ZERO_EXTEND)
21145 addr = XEXP (addr, 0);
21146 if (GET_CODE (addr) == SUBREG)
21147 addr = SUBREG_REG (addr);
21148 }
21149
21150 return memory_address_length (addr);
21151 }
21152
21153 extract_insn_cached (insn);
21154 for (i = recog_data.n_operands - 1; i >= 0; --i)
21155 if (MEM_P (recog_data.operand[i]))
21156 {
21157 constrain_operands_cached (reload_completed);
21158 if (which_alternative != -1)
21159 {
21160 const char *constraints = recog_data.constraints[i];
21161 int alt = which_alternative;
21162
21163 while (*constraints == '=' || *constraints == '+')
21164 constraints++;
21165 while (alt-- > 0)
21166 while (*constraints++ != ',')
21167 ;
21168 /* Skip ignored operands. */
21169 if (*constraints == 'X')
21170 continue;
21171 }
21172 return memory_address_length (XEXP (recog_data.operand[i], 0));
21173 }
21174 return 0;
21175 }
21176
21177 /* Compute default value for "length_vex" attribute. It includes
21178 2 or 3 byte VEX prefix and 1 opcode byte. */
21179
21180 int
21181 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
21182 int has_vex_w)
21183 {
21184 int i;
21185
21186 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
21187 byte VEX prefix. */
21188 if (!has_0f_opcode || has_vex_w)
21189 return 3 + 1;
21190
21191 /* We can always use 2 byte VEX prefix in 32bit. */
21192 if (!TARGET_64BIT)
21193 return 2 + 1;
21194
21195 extract_insn_cached (insn);
21196
21197 for (i = recog_data.n_operands - 1; i >= 0; --i)
21198 if (REG_P (recog_data.operand[i]))
21199 {
21200 /* REX.W bit uses 3 byte VEX prefix. */
21201 if (GET_MODE (recog_data.operand[i]) == DImode
21202 && GENERAL_REG_P (recog_data.operand[i]))
21203 return 3 + 1;
21204 }
21205 else
21206 {
21207 /* REX.X or REX.B bits use 3 byte VEX prefix. */
21208 if (MEM_P (recog_data.operand[i])
21209 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
21210 return 3 + 1;
21211 }
21212
21213 return 2 + 1;
21214 }
21215 \f
21216 /* Return the maximum number of instructions a cpu can issue. */
21217
21218 static int
21219 ix86_issue_rate (void)
21220 {
21221 switch (ix86_tune)
21222 {
21223 case PROCESSOR_PENTIUM:
21224 case PROCESSOR_ATOM:
21225 case PROCESSOR_K6:
21226 return 2;
21227
21228 case PROCESSOR_PENTIUMPRO:
21229 case PROCESSOR_PENTIUM4:
21230 case PROCESSOR_ATHLON:
21231 case PROCESSOR_K8:
21232 case PROCESSOR_AMDFAM10:
21233 case PROCESSOR_NOCONA:
21234 case PROCESSOR_GENERIC32:
21235 case PROCESSOR_GENERIC64:
21236 case PROCESSOR_BDVER1:
21237 return 3;
21238
21239 case PROCESSOR_CORE2:
21240 return 4;
21241
21242 default:
21243 return 1;
21244 }
21245 }
21246
21247 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
21248 by DEP_INSN and nothing set by DEP_INSN. */
21249
21250 static int
21251 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
21252 {
21253 rtx set, set2;
21254
21255 /* Simplify the test for uninteresting insns. */
21256 if (insn_type != TYPE_SETCC
21257 && insn_type != TYPE_ICMOV
21258 && insn_type != TYPE_FCMOV
21259 && insn_type != TYPE_IBR)
21260 return 0;
21261
21262 if ((set = single_set (dep_insn)) != 0)
21263 {
21264 set = SET_DEST (set);
21265 set2 = NULL_RTX;
21266 }
21267 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
21268 && XVECLEN (PATTERN (dep_insn), 0) == 2
21269 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
21270 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
21271 {
21272 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
21273 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
21274 }
21275 else
21276 return 0;
21277
21278 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
21279 return 0;
21280
21281 /* This test is true if the dependent insn reads the flags but
21282 not any other potentially set register. */
21283 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
21284 return 0;
21285
21286 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
21287 return 0;
21288
21289 return 1;
21290 }
21291
21292 /* Return true iff USE_INSN has a memory address with operands set by
21293 SET_INSN. */
21294
21295 bool
21296 ix86_agi_dependent (rtx set_insn, rtx use_insn)
21297 {
21298 int i;
21299 extract_insn_cached (use_insn);
21300 for (i = recog_data.n_operands - 1; i >= 0; --i)
21301 if (MEM_P (recog_data.operand[i]))
21302 {
21303 rtx addr = XEXP (recog_data.operand[i], 0);
21304 return modified_in_p (addr, set_insn) != 0;
21305 }
21306 return false;
21307 }
21308
21309 static int
21310 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
21311 {
21312 enum attr_type insn_type, dep_insn_type;
21313 enum attr_memory memory;
21314 rtx set, set2;
21315 int dep_insn_code_number;
21316
21317 /* Anti and output dependencies have zero cost on all CPUs. */
21318 if (REG_NOTE_KIND (link) != 0)
21319 return 0;
21320
21321 dep_insn_code_number = recog_memoized (dep_insn);
21322
21323 /* If we can't recognize the insns, we can't really do anything. */
21324 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
21325 return cost;
21326
21327 insn_type = get_attr_type (insn);
21328 dep_insn_type = get_attr_type (dep_insn);
21329
21330 switch (ix86_tune)
21331 {
21332 case PROCESSOR_PENTIUM:
21333 /* Address Generation Interlock adds a cycle of latency. */
21334 if (insn_type == TYPE_LEA)
21335 {
21336 rtx addr = PATTERN (insn);
21337
21338 if (GET_CODE (addr) == PARALLEL)
21339 addr = XVECEXP (addr, 0, 0);
21340
21341 gcc_assert (GET_CODE (addr) == SET);
21342
21343 addr = SET_SRC (addr);
21344 if (modified_in_p (addr, dep_insn))
21345 cost += 1;
21346 }
21347 else if (ix86_agi_dependent (dep_insn, insn))
21348 cost += 1;
21349
21350 /* ??? Compares pair with jump/setcc. */
21351 if (ix86_flags_dependent (insn, dep_insn, insn_type))
21352 cost = 0;
21353
21354 /* Floating point stores require value to be ready one cycle earlier. */
21355 if (insn_type == TYPE_FMOV
21356 && get_attr_memory (insn) == MEMORY_STORE
21357 && !ix86_agi_dependent (dep_insn, insn))
21358 cost += 1;
21359 break;
21360
21361 case PROCESSOR_PENTIUMPRO:
21362 memory = get_attr_memory (insn);
21363
21364 /* INT->FP conversion is expensive. */
21365 if (get_attr_fp_int_src (dep_insn))
21366 cost += 5;
21367
21368 /* There is one cycle extra latency between an FP op and a store. */
21369 if (insn_type == TYPE_FMOV
21370 && (set = single_set (dep_insn)) != NULL_RTX
21371 && (set2 = single_set (insn)) != NULL_RTX
21372 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
21373 && MEM_P (SET_DEST (set2)))
21374 cost += 1;
21375
21376 /* Show ability of reorder buffer to hide latency of load by executing
21377 in parallel with previous instruction in case
21378 previous instruction is not needed to compute the address. */
21379 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
21380 && !ix86_agi_dependent (dep_insn, insn))
21381 {
21382 /* Claim moves to take one cycle, as core can issue one load
21383 at time and the next load can start cycle later. */
21384 if (dep_insn_type == TYPE_IMOV
21385 || dep_insn_type == TYPE_FMOV)
21386 cost = 1;
21387 else if (cost > 1)
21388 cost--;
21389 }
21390 break;
21391
21392 case PROCESSOR_K6:
21393 memory = get_attr_memory (insn);
21394
21395 /* The esp dependency is resolved before the instruction is really
21396 finished. */
21397 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
21398 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
21399 return 1;
21400
21401 /* INT->FP conversion is expensive. */
21402 if (get_attr_fp_int_src (dep_insn))
21403 cost += 5;
21404
21405 /* Show ability of reorder buffer to hide latency of load by executing
21406 in parallel with previous instruction in case
21407 previous instruction is not needed to compute the address. */
21408 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
21409 && !ix86_agi_dependent (dep_insn, insn))
21410 {
21411 /* Claim moves to take one cycle, as core can issue one load
21412 at time and the next load can start cycle later. */
21413 if (dep_insn_type == TYPE_IMOV
21414 || dep_insn_type == TYPE_FMOV)
21415 cost = 1;
21416 else if (cost > 2)
21417 cost -= 2;
21418 else
21419 cost = 1;
21420 }
21421 break;
21422
21423 case PROCESSOR_ATHLON:
21424 case PROCESSOR_K8:
21425 case PROCESSOR_AMDFAM10:
21426 case PROCESSOR_BDVER1:
21427 case PROCESSOR_ATOM:
21428 case PROCESSOR_GENERIC32:
21429 case PROCESSOR_GENERIC64:
21430 memory = get_attr_memory (insn);
21431
21432 /* Show ability of reorder buffer to hide latency of load by executing
21433 in parallel with previous instruction in case
21434 previous instruction is not needed to compute the address. */
21435 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
21436 && !ix86_agi_dependent (dep_insn, insn))
21437 {
21438 enum attr_unit unit = get_attr_unit (insn);
21439 int loadcost = 3;
21440
21441 /* Because of the difference between the length of integer and
21442 floating unit pipeline preparation stages, the memory operands
21443 for floating point are cheaper.
21444
21445 ??? For Athlon it the difference is most probably 2. */
21446 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
21447 loadcost = 3;
21448 else
21449 loadcost = TARGET_ATHLON ? 2 : 0;
21450
21451 if (cost >= loadcost)
21452 cost -= loadcost;
21453 else
21454 cost = 0;
21455 }
21456
21457 default:
21458 break;
21459 }
21460
21461 return cost;
21462 }
21463
21464 /* How many alternative schedules to try. This should be as wide as the
21465 scheduling freedom in the DFA, but no wider. Making this value too
21466 large results extra work for the scheduler. */
21467
21468 static int
21469 ia32_multipass_dfa_lookahead (void)
21470 {
21471 switch (ix86_tune)
21472 {
21473 case PROCESSOR_PENTIUM:
21474 return 2;
21475
21476 case PROCESSOR_PENTIUMPRO:
21477 case PROCESSOR_K6:
21478 return 1;
21479
21480 default:
21481 return 0;
21482 }
21483 }
21484
21485 \f
21486 /* Compute the alignment given to a constant that is being placed in memory.
21487 EXP is the constant and ALIGN is the alignment that the object would
21488 ordinarily have.
21489 The value of this function is used instead of that alignment to align
21490 the object. */
21491
21492 int
21493 ix86_constant_alignment (tree exp, int align)
21494 {
21495 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
21496 || TREE_CODE (exp) == INTEGER_CST)
21497 {
21498 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
21499 return 64;
21500 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
21501 return 128;
21502 }
21503 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
21504 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
21505 return BITS_PER_WORD;
21506
21507 return align;
21508 }
21509
21510 /* Compute the alignment for a static variable.
21511 TYPE is the data type, and ALIGN is the alignment that
21512 the object would ordinarily have. The value of this function is used
21513 instead of that alignment to align the object. */
21514
21515 int
21516 ix86_data_alignment (tree type, int align)
21517 {
21518 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
21519
21520 if (AGGREGATE_TYPE_P (type)
21521 && TYPE_SIZE (type)
21522 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
21523 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
21524 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
21525 && align < max_align)
21526 align = max_align;
21527
21528 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
21529 to 16byte boundary. */
21530 if (TARGET_64BIT)
21531 {
21532 if (AGGREGATE_TYPE_P (type)
21533 && TYPE_SIZE (type)
21534 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
21535 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
21536 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
21537 return 128;
21538 }
21539
21540 if (TREE_CODE (type) == ARRAY_TYPE)
21541 {
21542 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
21543 return 64;
21544 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
21545 return 128;
21546 }
21547 else if (TREE_CODE (type) == COMPLEX_TYPE)
21548 {
21549
21550 if (TYPE_MODE (type) == DCmode && align < 64)
21551 return 64;
21552 if ((TYPE_MODE (type) == XCmode
21553 || TYPE_MODE (type) == TCmode) && align < 128)
21554 return 128;
21555 }
21556 else if ((TREE_CODE (type) == RECORD_TYPE
21557 || TREE_CODE (type) == UNION_TYPE
21558 || TREE_CODE (type) == QUAL_UNION_TYPE)
21559 && TYPE_FIELDS (type))
21560 {
21561 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
21562 return 64;
21563 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
21564 return 128;
21565 }
21566 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
21567 || TREE_CODE (type) == INTEGER_TYPE)
21568 {
21569 if (TYPE_MODE (type) == DFmode && align < 64)
21570 return 64;
21571 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
21572 return 128;
21573 }
21574
21575 return align;
21576 }
21577
21578 /* Compute the alignment for a local variable or a stack slot. EXP is
21579 the data type or decl itself, MODE is the widest mode available and
21580 ALIGN is the alignment that the object would ordinarily have. The
21581 value of this macro is used instead of that alignment to align the
21582 object. */
21583
21584 unsigned int
21585 ix86_local_alignment (tree exp, enum machine_mode mode,
21586 unsigned int align)
21587 {
21588 tree type, decl;
21589
21590 if (exp && DECL_P (exp))
21591 {
21592 type = TREE_TYPE (exp);
21593 decl = exp;
21594 }
21595 else
21596 {
21597 type = exp;
21598 decl = NULL;
21599 }
21600
21601 /* Don't do dynamic stack realignment for long long objects with
21602 -mpreferred-stack-boundary=2. */
21603 if (!TARGET_64BIT
21604 && align == 64
21605 && ix86_preferred_stack_boundary < 64
21606 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
21607 && (!type || !TYPE_USER_ALIGN (type))
21608 && (!decl || !DECL_USER_ALIGN (decl)))
21609 align = 32;
21610
21611 /* If TYPE is NULL, we are allocating a stack slot for caller-save
21612 register in MODE. We will return the largest alignment of XF
21613 and DF. */
21614 if (!type)
21615 {
21616 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
21617 align = GET_MODE_ALIGNMENT (DFmode);
21618 return align;
21619 }
21620
21621 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
21622 to 16byte boundary. Exact wording is:
21623
21624 An array uses the same alignment as its elements, except that a local or
21625 global array variable of length at least 16 bytes or
21626 a C99 variable-length array variable always has alignment of at least 16 bytes.
21627
21628 This was added to allow use of aligned SSE instructions at arrays. This
21629 rule is meant for static storage (where compiler can not do the analysis
21630 by itself). We follow it for automatic variables only when convenient.
21631 We fully control everything in the function compiled and functions from
21632 other unit can not rely on the alignment.
21633
21634 Exclude va_list type. It is the common case of local array where
21635 we can not benefit from the alignment. */
21636 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
21637 && TARGET_SSE)
21638 {
21639 if (AGGREGATE_TYPE_P (type)
21640 && (TYPE_MAIN_VARIANT (type)
21641 != TYPE_MAIN_VARIANT (va_list_type_node))
21642 && TYPE_SIZE (type)
21643 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
21644 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
21645 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
21646 return 128;
21647 }
21648 if (TREE_CODE (type) == ARRAY_TYPE)
21649 {
21650 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
21651 return 64;
21652 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
21653 return 128;
21654 }
21655 else if (TREE_CODE (type) == COMPLEX_TYPE)
21656 {
21657 if (TYPE_MODE (type) == DCmode && align < 64)
21658 return 64;
21659 if ((TYPE_MODE (type) == XCmode
21660 || TYPE_MODE (type) == TCmode) && align < 128)
21661 return 128;
21662 }
21663 else if ((TREE_CODE (type) == RECORD_TYPE
21664 || TREE_CODE (type) == UNION_TYPE
21665 || TREE_CODE (type) == QUAL_UNION_TYPE)
21666 && TYPE_FIELDS (type))
21667 {
21668 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
21669 return 64;
21670 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
21671 return 128;
21672 }
21673 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
21674 || TREE_CODE (type) == INTEGER_TYPE)
21675 {
21676
21677 if (TYPE_MODE (type) == DFmode && align < 64)
21678 return 64;
21679 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
21680 return 128;
21681 }
21682 return align;
21683 }
21684
21685 /* Compute the minimum required alignment for dynamic stack realignment
21686 purposes for a local variable, parameter or a stack slot. EXP is
21687 the data type or decl itself, MODE is its mode and ALIGN is the
21688 alignment that the object would ordinarily have. */
21689
21690 unsigned int
21691 ix86_minimum_alignment (tree exp, enum machine_mode mode,
21692 unsigned int align)
21693 {
21694 tree type, decl;
21695
21696 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
21697 return align;
21698
21699 if (exp && DECL_P (exp))
21700 {
21701 type = TREE_TYPE (exp);
21702 decl = exp;
21703 }
21704 else
21705 {
21706 type = exp;
21707 decl = NULL;
21708 }
21709
21710 /* Don't do dynamic stack realignment for long long objects with
21711 -mpreferred-stack-boundary=2. */
21712 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
21713 && (!type || !TYPE_USER_ALIGN (type))
21714 && (!decl || !DECL_USER_ALIGN (decl)))
21715 return 32;
21716
21717 return align;
21718 }
21719 \f
21720 /* Find a location for the static chain incoming to a nested function.
21721 This is a register, unless all free registers are used by arguments. */
21722
21723 static rtx
21724 ix86_static_chain (const_tree fndecl, bool incoming_p)
21725 {
21726 unsigned regno;
21727
21728 if (!DECL_STATIC_CHAIN (fndecl))
21729 return NULL;
21730
21731 if (TARGET_64BIT)
21732 {
21733 /* We always use R10 in 64-bit mode. */
21734 regno = R10_REG;
21735 }
21736 else
21737 {
21738 tree fntype;
21739 /* By default in 32-bit mode we use ECX to pass the static chain. */
21740 regno = CX_REG;
21741
21742 fntype = TREE_TYPE (fndecl);
21743 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
21744 {
21745 /* Fastcall functions use ecx/edx for arguments, which leaves
21746 us with EAX for the static chain. */
21747 regno = AX_REG;
21748 }
21749 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
21750 {
21751 /* Thiscall functions use ecx for arguments, which leaves
21752 us with EAX for the static chain. */
21753 regno = AX_REG;
21754 }
21755 else if (ix86_function_regparm (fntype, fndecl) == 3)
21756 {
21757 /* For regparm 3, we have no free call-clobbered registers in
21758 which to store the static chain. In order to implement this,
21759 we have the trampoline push the static chain to the stack.
21760 However, we can't push a value below the return address when
21761 we call the nested function directly, so we have to use an
21762 alternate entry point. For this we use ESI, and have the
21763 alternate entry point push ESI, so that things appear the
21764 same once we're executing the nested function. */
21765 if (incoming_p)
21766 {
21767 if (fndecl == current_function_decl)
21768 ix86_static_chain_on_stack = true;
21769 return gen_frame_mem (SImode,
21770 plus_constant (arg_pointer_rtx, -8));
21771 }
21772 regno = SI_REG;
21773 }
21774 }
21775
21776 return gen_rtx_REG (Pmode, regno);
21777 }
21778
21779 /* Emit RTL insns to initialize the variable parts of a trampoline.
21780 FNDECL is the decl of the target address; M_TRAMP is a MEM for
21781 the trampoline, and CHAIN_VALUE is an RTX for the static chain
21782 to be passed to the target function. */
21783
21784 static void
21785 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
21786 {
21787 rtx mem, fnaddr;
21788
21789 fnaddr = XEXP (DECL_RTL (fndecl), 0);
21790
21791 if (!TARGET_64BIT)
21792 {
21793 rtx disp, chain;
21794 int opcode;
21795
21796 /* Depending on the static chain location, either load a register
21797 with a constant, or push the constant to the stack. All of the
21798 instructions are the same size. */
21799 chain = ix86_static_chain (fndecl, true);
21800 if (REG_P (chain))
21801 {
21802 if (REGNO (chain) == CX_REG)
21803 opcode = 0xb9;
21804 else if (REGNO (chain) == AX_REG)
21805 opcode = 0xb8;
21806 else
21807 gcc_unreachable ();
21808 }
21809 else
21810 opcode = 0x68;
21811
21812 mem = adjust_address (m_tramp, QImode, 0);
21813 emit_move_insn (mem, gen_int_mode (opcode, QImode));
21814
21815 mem = adjust_address (m_tramp, SImode, 1);
21816 emit_move_insn (mem, chain_value);
21817
21818 /* Compute offset from the end of the jmp to the target function.
21819 In the case in which the trampoline stores the static chain on
21820 the stack, we need to skip the first insn which pushes the
21821 (call-saved) register static chain; this push is 1 byte. */
21822 disp = expand_binop (SImode, sub_optab, fnaddr,
21823 plus_constant (XEXP (m_tramp, 0),
21824 MEM_P (chain) ? 9 : 10),
21825 NULL_RTX, 1, OPTAB_DIRECT);
21826
21827 mem = adjust_address (m_tramp, QImode, 5);
21828 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
21829
21830 mem = adjust_address (m_tramp, SImode, 6);
21831 emit_move_insn (mem, disp);
21832 }
21833 else
21834 {
21835 int offset = 0;
21836
21837 /* Load the function address to r11. Try to load address using
21838 the shorter movl instead of movabs. We may want to support
21839 movq for kernel mode, but kernel does not use trampolines at
21840 the moment. */
21841 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
21842 {
21843 fnaddr = copy_to_mode_reg (DImode, fnaddr);
21844
21845 mem = adjust_address (m_tramp, HImode, offset);
21846 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
21847
21848 mem = adjust_address (m_tramp, SImode, offset + 2);
21849 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
21850 offset += 6;
21851 }
21852 else
21853 {
21854 mem = adjust_address (m_tramp, HImode, offset);
21855 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
21856
21857 mem = adjust_address (m_tramp, DImode, offset + 2);
21858 emit_move_insn (mem, fnaddr);
21859 offset += 10;
21860 }
21861
21862 /* Load static chain using movabs to r10. */
21863 mem = adjust_address (m_tramp, HImode, offset);
21864 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
21865
21866 mem = adjust_address (m_tramp, DImode, offset + 2);
21867 emit_move_insn (mem, chain_value);
21868 offset += 10;
21869
21870 /* Jump to r11; the last (unused) byte is a nop, only there to
21871 pad the write out to a single 32-bit store. */
21872 mem = adjust_address (m_tramp, SImode, offset);
21873 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
21874 offset += 4;
21875
21876 gcc_assert (offset <= TRAMPOLINE_SIZE);
21877 }
21878
21879 #ifdef ENABLE_EXECUTE_STACK
21880 #ifdef CHECK_EXECUTE_STACK_ENABLED
21881 if (CHECK_EXECUTE_STACK_ENABLED)
21882 #endif
21883 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
21884 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
21885 #endif
21886 }
21887 \f
21888 /* The following file contains several enumerations and data structures
21889 built from the definitions in i386-builtin-types.def. */
21890
21891 #include "i386-builtin-types.inc"
21892
21893 /* Table for the ix86 builtin non-function types. */
21894 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
21895
21896 /* Retrieve an element from the above table, building some of
21897 the types lazily. */
21898
21899 static tree
21900 ix86_get_builtin_type (enum ix86_builtin_type tcode)
21901 {
21902 unsigned int index;
21903 tree type, itype;
21904
21905 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
21906
21907 type = ix86_builtin_type_tab[(int) tcode];
21908 if (type != NULL)
21909 return type;
21910
21911 gcc_assert (tcode > IX86_BT_LAST_PRIM);
21912 if (tcode <= IX86_BT_LAST_VECT)
21913 {
21914 enum machine_mode mode;
21915
21916 index = tcode - IX86_BT_LAST_PRIM - 1;
21917 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
21918 mode = ix86_builtin_type_vect_mode[index];
21919
21920 type = build_vector_type_for_mode (itype, mode);
21921 }
21922 else
21923 {
21924 int quals;
21925
21926 index = tcode - IX86_BT_LAST_VECT - 1;
21927 if (tcode <= IX86_BT_LAST_PTR)
21928 quals = TYPE_UNQUALIFIED;
21929 else
21930 quals = TYPE_QUAL_CONST;
21931
21932 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
21933 if (quals != TYPE_UNQUALIFIED)
21934 itype = build_qualified_type (itype, quals);
21935
21936 type = build_pointer_type (itype);
21937 }
21938
21939 ix86_builtin_type_tab[(int) tcode] = type;
21940 return type;
21941 }
21942
21943 /* Table for the ix86 builtin function types. */
21944 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
21945
21946 /* Retrieve an element from the above table, building some of
21947 the types lazily. */
21948
21949 static tree
21950 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
21951 {
21952 tree type;
21953
21954 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
21955
21956 type = ix86_builtin_func_type_tab[(int) tcode];
21957 if (type != NULL)
21958 return type;
21959
21960 if (tcode <= IX86_BT_LAST_FUNC)
21961 {
21962 unsigned start = ix86_builtin_func_start[(int) tcode];
21963 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
21964 tree rtype, atype, args = void_list_node;
21965 unsigned i;
21966
21967 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
21968 for (i = after - 1; i > start; --i)
21969 {
21970 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
21971 args = tree_cons (NULL, atype, args);
21972 }
21973
21974 type = build_function_type (rtype, args);
21975 }
21976 else
21977 {
21978 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
21979 enum ix86_builtin_func_type icode;
21980
21981 icode = ix86_builtin_func_alias_base[index];
21982 type = ix86_get_builtin_func_type (icode);
21983 }
21984
21985 ix86_builtin_func_type_tab[(int) tcode] = type;
21986 return type;
21987 }
21988
21989
21990 /* Codes for all the SSE/MMX builtins. */
21991 enum ix86_builtins
21992 {
21993 IX86_BUILTIN_ADDPS,
21994 IX86_BUILTIN_ADDSS,
21995 IX86_BUILTIN_DIVPS,
21996 IX86_BUILTIN_DIVSS,
21997 IX86_BUILTIN_MULPS,
21998 IX86_BUILTIN_MULSS,
21999 IX86_BUILTIN_SUBPS,
22000 IX86_BUILTIN_SUBSS,
22001
22002 IX86_BUILTIN_CMPEQPS,
22003 IX86_BUILTIN_CMPLTPS,
22004 IX86_BUILTIN_CMPLEPS,
22005 IX86_BUILTIN_CMPGTPS,
22006 IX86_BUILTIN_CMPGEPS,
22007 IX86_BUILTIN_CMPNEQPS,
22008 IX86_BUILTIN_CMPNLTPS,
22009 IX86_BUILTIN_CMPNLEPS,
22010 IX86_BUILTIN_CMPNGTPS,
22011 IX86_BUILTIN_CMPNGEPS,
22012 IX86_BUILTIN_CMPORDPS,
22013 IX86_BUILTIN_CMPUNORDPS,
22014 IX86_BUILTIN_CMPEQSS,
22015 IX86_BUILTIN_CMPLTSS,
22016 IX86_BUILTIN_CMPLESS,
22017 IX86_BUILTIN_CMPNEQSS,
22018 IX86_BUILTIN_CMPNLTSS,
22019 IX86_BUILTIN_CMPNLESS,
22020 IX86_BUILTIN_CMPNGTSS,
22021 IX86_BUILTIN_CMPNGESS,
22022 IX86_BUILTIN_CMPORDSS,
22023 IX86_BUILTIN_CMPUNORDSS,
22024
22025 IX86_BUILTIN_COMIEQSS,
22026 IX86_BUILTIN_COMILTSS,
22027 IX86_BUILTIN_COMILESS,
22028 IX86_BUILTIN_COMIGTSS,
22029 IX86_BUILTIN_COMIGESS,
22030 IX86_BUILTIN_COMINEQSS,
22031 IX86_BUILTIN_UCOMIEQSS,
22032 IX86_BUILTIN_UCOMILTSS,
22033 IX86_BUILTIN_UCOMILESS,
22034 IX86_BUILTIN_UCOMIGTSS,
22035 IX86_BUILTIN_UCOMIGESS,
22036 IX86_BUILTIN_UCOMINEQSS,
22037
22038 IX86_BUILTIN_CVTPI2PS,
22039 IX86_BUILTIN_CVTPS2PI,
22040 IX86_BUILTIN_CVTSI2SS,
22041 IX86_BUILTIN_CVTSI642SS,
22042 IX86_BUILTIN_CVTSS2SI,
22043 IX86_BUILTIN_CVTSS2SI64,
22044 IX86_BUILTIN_CVTTPS2PI,
22045 IX86_BUILTIN_CVTTSS2SI,
22046 IX86_BUILTIN_CVTTSS2SI64,
22047
22048 IX86_BUILTIN_MAXPS,
22049 IX86_BUILTIN_MAXSS,
22050 IX86_BUILTIN_MINPS,
22051 IX86_BUILTIN_MINSS,
22052
22053 IX86_BUILTIN_LOADUPS,
22054 IX86_BUILTIN_STOREUPS,
22055 IX86_BUILTIN_MOVSS,
22056
22057 IX86_BUILTIN_MOVHLPS,
22058 IX86_BUILTIN_MOVLHPS,
22059 IX86_BUILTIN_LOADHPS,
22060 IX86_BUILTIN_LOADLPS,
22061 IX86_BUILTIN_STOREHPS,
22062 IX86_BUILTIN_STORELPS,
22063
22064 IX86_BUILTIN_MASKMOVQ,
22065 IX86_BUILTIN_MOVMSKPS,
22066 IX86_BUILTIN_PMOVMSKB,
22067
22068 IX86_BUILTIN_MOVNTPS,
22069 IX86_BUILTIN_MOVNTQ,
22070
22071 IX86_BUILTIN_LOADDQU,
22072 IX86_BUILTIN_STOREDQU,
22073
22074 IX86_BUILTIN_PACKSSWB,
22075 IX86_BUILTIN_PACKSSDW,
22076 IX86_BUILTIN_PACKUSWB,
22077
22078 IX86_BUILTIN_PADDB,
22079 IX86_BUILTIN_PADDW,
22080 IX86_BUILTIN_PADDD,
22081 IX86_BUILTIN_PADDQ,
22082 IX86_BUILTIN_PADDSB,
22083 IX86_BUILTIN_PADDSW,
22084 IX86_BUILTIN_PADDUSB,
22085 IX86_BUILTIN_PADDUSW,
22086 IX86_BUILTIN_PSUBB,
22087 IX86_BUILTIN_PSUBW,
22088 IX86_BUILTIN_PSUBD,
22089 IX86_BUILTIN_PSUBQ,
22090 IX86_BUILTIN_PSUBSB,
22091 IX86_BUILTIN_PSUBSW,
22092 IX86_BUILTIN_PSUBUSB,
22093 IX86_BUILTIN_PSUBUSW,
22094
22095 IX86_BUILTIN_PAND,
22096 IX86_BUILTIN_PANDN,
22097 IX86_BUILTIN_POR,
22098 IX86_BUILTIN_PXOR,
22099
22100 IX86_BUILTIN_PAVGB,
22101 IX86_BUILTIN_PAVGW,
22102
22103 IX86_BUILTIN_PCMPEQB,
22104 IX86_BUILTIN_PCMPEQW,
22105 IX86_BUILTIN_PCMPEQD,
22106 IX86_BUILTIN_PCMPGTB,
22107 IX86_BUILTIN_PCMPGTW,
22108 IX86_BUILTIN_PCMPGTD,
22109
22110 IX86_BUILTIN_PMADDWD,
22111
22112 IX86_BUILTIN_PMAXSW,
22113 IX86_BUILTIN_PMAXUB,
22114 IX86_BUILTIN_PMINSW,
22115 IX86_BUILTIN_PMINUB,
22116
22117 IX86_BUILTIN_PMULHUW,
22118 IX86_BUILTIN_PMULHW,
22119 IX86_BUILTIN_PMULLW,
22120
22121 IX86_BUILTIN_PSADBW,
22122 IX86_BUILTIN_PSHUFW,
22123
22124 IX86_BUILTIN_PSLLW,
22125 IX86_BUILTIN_PSLLD,
22126 IX86_BUILTIN_PSLLQ,
22127 IX86_BUILTIN_PSRAW,
22128 IX86_BUILTIN_PSRAD,
22129 IX86_BUILTIN_PSRLW,
22130 IX86_BUILTIN_PSRLD,
22131 IX86_BUILTIN_PSRLQ,
22132 IX86_BUILTIN_PSLLWI,
22133 IX86_BUILTIN_PSLLDI,
22134 IX86_BUILTIN_PSLLQI,
22135 IX86_BUILTIN_PSRAWI,
22136 IX86_BUILTIN_PSRADI,
22137 IX86_BUILTIN_PSRLWI,
22138 IX86_BUILTIN_PSRLDI,
22139 IX86_BUILTIN_PSRLQI,
22140
22141 IX86_BUILTIN_PUNPCKHBW,
22142 IX86_BUILTIN_PUNPCKHWD,
22143 IX86_BUILTIN_PUNPCKHDQ,
22144 IX86_BUILTIN_PUNPCKLBW,
22145 IX86_BUILTIN_PUNPCKLWD,
22146 IX86_BUILTIN_PUNPCKLDQ,
22147
22148 IX86_BUILTIN_SHUFPS,
22149
22150 IX86_BUILTIN_RCPPS,
22151 IX86_BUILTIN_RCPSS,
22152 IX86_BUILTIN_RSQRTPS,
22153 IX86_BUILTIN_RSQRTPS_NR,
22154 IX86_BUILTIN_RSQRTSS,
22155 IX86_BUILTIN_RSQRTF,
22156 IX86_BUILTIN_SQRTPS,
22157 IX86_BUILTIN_SQRTPS_NR,
22158 IX86_BUILTIN_SQRTSS,
22159
22160 IX86_BUILTIN_UNPCKHPS,
22161 IX86_BUILTIN_UNPCKLPS,
22162
22163 IX86_BUILTIN_ANDPS,
22164 IX86_BUILTIN_ANDNPS,
22165 IX86_BUILTIN_ORPS,
22166 IX86_BUILTIN_XORPS,
22167
22168 IX86_BUILTIN_EMMS,
22169 IX86_BUILTIN_LDMXCSR,
22170 IX86_BUILTIN_STMXCSR,
22171 IX86_BUILTIN_SFENCE,
22172
22173 /* 3DNow! Original */
22174 IX86_BUILTIN_FEMMS,
22175 IX86_BUILTIN_PAVGUSB,
22176 IX86_BUILTIN_PF2ID,
22177 IX86_BUILTIN_PFACC,
22178 IX86_BUILTIN_PFADD,
22179 IX86_BUILTIN_PFCMPEQ,
22180 IX86_BUILTIN_PFCMPGE,
22181 IX86_BUILTIN_PFCMPGT,
22182 IX86_BUILTIN_PFMAX,
22183 IX86_BUILTIN_PFMIN,
22184 IX86_BUILTIN_PFMUL,
22185 IX86_BUILTIN_PFRCP,
22186 IX86_BUILTIN_PFRCPIT1,
22187 IX86_BUILTIN_PFRCPIT2,
22188 IX86_BUILTIN_PFRSQIT1,
22189 IX86_BUILTIN_PFRSQRT,
22190 IX86_BUILTIN_PFSUB,
22191 IX86_BUILTIN_PFSUBR,
22192 IX86_BUILTIN_PI2FD,
22193 IX86_BUILTIN_PMULHRW,
22194
22195 /* 3DNow! Athlon Extensions */
22196 IX86_BUILTIN_PF2IW,
22197 IX86_BUILTIN_PFNACC,
22198 IX86_BUILTIN_PFPNACC,
22199 IX86_BUILTIN_PI2FW,
22200 IX86_BUILTIN_PSWAPDSI,
22201 IX86_BUILTIN_PSWAPDSF,
22202
22203 /* SSE2 */
22204 IX86_BUILTIN_ADDPD,
22205 IX86_BUILTIN_ADDSD,
22206 IX86_BUILTIN_DIVPD,
22207 IX86_BUILTIN_DIVSD,
22208 IX86_BUILTIN_MULPD,
22209 IX86_BUILTIN_MULSD,
22210 IX86_BUILTIN_SUBPD,
22211 IX86_BUILTIN_SUBSD,
22212
22213 IX86_BUILTIN_CMPEQPD,
22214 IX86_BUILTIN_CMPLTPD,
22215 IX86_BUILTIN_CMPLEPD,
22216 IX86_BUILTIN_CMPGTPD,
22217 IX86_BUILTIN_CMPGEPD,
22218 IX86_BUILTIN_CMPNEQPD,
22219 IX86_BUILTIN_CMPNLTPD,
22220 IX86_BUILTIN_CMPNLEPD,
22221 IX86_BUILTIN_CMPNGTPD,
22222 IX86_BUILTIN_CMPNGEPD,
22223 IX86_BUILTIN_CMPORDPD,
22224 IX86_BUILTIN_CMPUNORDPD,
22225 IX86_BUILTIN_CMPEQSD,
22226 IX86_BUILTIN_CMPLTSD,
22227 IX86_BUILTIN_CMPLESD,
22228 IX86_BUILTIN_CMPNEQSD,
22229 IX86_BUILTIN_CMPNLTSD,
22230 IX86_BUILTIN_CMPNLESD,
22231 IX86_BUILTIN_CMPORDSD,
22232 IX86_BUILTIN_CMPUNORDSD,
22233
22234 IX86_BUILTIN_COMIEQSD,
22235 IX86_BUILTIN_COMILTSD,
22236 IX86_BUILTIN_COMILESD,
22237 IX86_BUILTIN_COMIGTSD,
22238 IX86_BUILTIN_COMIGESD,
22239 IX86_BUILTIN_COMINEQSD,
22240 IX86_BUILTIN_UCOMIEQSD,
22241 IX86_BUILTIN_UCOMILTSD,
22242 IX86_BUILTIN_UCOMILESD,
22243 IX86_BUILTIN_UCOMIGTSD,
22244 IX86_BUILTIN_UCOMIGESD,
22245 IX86_BUILTIN_UCOMINEQSD,
22246
22247 IX86_BUILTIN_MAXPD,
22248 IX86_BUILTIN_MAXSD,
22249 IX86_BUILTIN_MINPD,
22250 IX86_BUILTIN_MINSD,
22251
22252 IX86_BUILTIN_ANDPD,
22253 IX86_BUILTIN_ANDNPD,
22254 IX86_BUILTIN_ORPD,
22255 IX86_BUILTIN_XORPD,
22256
22257 IX86_BUILTIN_SQRTPD,
22258 IX86_BUILTIN_SQRTSD,
22259
22260 IX86_BUILTIN_UNPCKHPD,
22261 IX86_BUILTIN_UNPCKLPD,
22262
22263 IX86_BUILTIN_SHUFPD,
22264
22265 IX86_BUILTIN_LOADUPD,
22266 IX86_BUILTIN_STOREUPD,
22267 IX86_BUILTIN_MOVSD,
22268
22269 IX86_BUILTIN_LOADHPD,
22270 IX86_BUILTIN_LOADLPD,
22271
22272 IX86_BUILTIN_CVTDQ2PD,
22273 IX86_BUILTIN_CVTDQ2PS,
22274
22275 IX86_BUILTIN_CVTPD2DQ,
22276 IX86_BUILTIN_CVTPD2PI,
22277 IX86_BUILTIN_CVTPD2PS,
22278 IX86_BUILTIN_CVTTPD2DQ,
22279 IX86_BUILTIN_CVTTPD2PI,
22280
22281 IX86_BUILTIN_CVTPI2PD,
22282 IX86_BUILTIN_CVTSI2SD,
22283 IX86_BUILTIN_CVTSI642SD,
22284
22285 IX86_BUILTIN_CVTSD2SI,
22286 IX86_BUILTIN_CVTSD2SI64,
22287 IX86_BUILTIN_CVTSD2SS,
22288 IX86_BUILTIN_CVTSS2SD,
22289 IX86_BUILTIN_CVTTSD2SI,
22290 IX86_BUILTIN_CVTTSD2SI64,
22291
22292 IX86_BUILTIN_CVTPS2DQ,
22293 IX86_BUILTIN_CVTPS2PD,
22294 IX86_BUILTIN_CVTTPS2DQ,
22295
22296 IX86_BUILTIN_MOVNTI,
22297 IX86_BUILTIN_MOVNTPD,
22298 IX86_BUILTIN_MOVNTDQ,
22299
22300 IX86_BUILTIN_MOVQ128,
22301
22302 /* SSE2 MMX */
22303 IX86_BUILTIN_MASKMOVDQU,
22304 IX86_BUILTIN_MOVMSKPD,
22305 IX86_BUILTIN_PMOVMSKB128,
22306
22307 IX86_BUILTIN_PACKSSWB128,
22308 IX86_BUILTIN_PACKSSDW128,
22309 IX86_BUILTIN_PACKUSWB128,
22310
22311 IX86_BUILTIN_PADDB128,
22312 IX86_BUILTIN_PADDW128,
22313 IX86_BUILTIN_PADDD128,
22314 IX86_BUILTIN_PADDQ128,
22315 IX86_BUILTIN_PADDSB128,
22316 IX86_BUILTIN_PADDSW128,
22317 IX86_BUILTIN_PADDUSB128,
22318 IX86_BUILTIN_PADDUSW128,
22319 IX86_BUILTIN_PSUBB128,
22320 IX86_BUILTIN_PSUBW128,
22321 IX86_BUILTIN_PSUBD128,
22322 IX86_BUILTIN_PSUBQ128,
22323 IX86_BUILTIN_PSUBSB128,
22324 IX86_BUILTIN_PSUBSW128,
22325 IX86_BUILTIN_PSUBUSB128,
22326 IX86_BUILTIN_PSUBUSW128,
22327
22328 IX86_BUILTIN_PAND128,
22329 IX86_BUILTIN_PANDN128,
22330 IX86_BUILTIN_POR128,
22331 IX86_BUILTIN_PXOR128,
22332
22333 IX86_BUILTIN_PAVGB128,
22334 IX86_BUILTIN_PAVGW128,
22335
22336 IX86_BUILTIN_PCMPEQB128,
22337 IX86_BUILTIN_PCMPEQW128,
22338 IX86_BUILTIN_PCMPEQD128,
22339 IX86_BUILTIN_PCMPGTB128,
22340 IX86_BUILTIN_PCMPGTW128,
22341 IX86_BUILTIN_PCMPGTD128,
22342
22343 IX86_BUILTIN_PMADDWD128,
22344
22345 IX86_BUILTIN_PMAXSW128,
22346 IX86_BUILTIN_PMAXUB128,
22347 IX86_BUILTIN_PMINSW128,
22348 IX86_BUILTIN_PMINUB128,
22349
22350 IX86_BUILTIN_PMULUDQ,
22351 IX86_BUILTIN_PMULUDQ128,
22352 IX86_BUILTIN_PMULHUW128,
22353 IX86_BUILTIN_PMULHW128,
22354 IX86_BUILTIN_PMULLW128,
22355
22356 IX86_BUILTIN_PSADBW128,
22357 IX86_BUILTIN_PSHUFHW,
22358 IX86_BUILTIN_PSHUFLW,
22359 IX86_BUILTIN_PSHUFD,
22360
22361 IX86_BUILTIN_PSLLDQI128,
22362 IX86_BUILTIN_PSLLWI128,
22363 IX86_BUILTIN_PSLLDI128,
22364 IX86_BUILTIN_PSLLQI128,
22365 IX86_BUILTIN_PSRAWI128,
22366 IX86_BUILTIN_PSRADI128,
22367 IX86_BUILTIN_PSRLDQI128,
22368 IX86_BUILTIN_PSRLWI128,
22369 IX86_BUILTIN_PSRLDI128,
22370 IX86_BUILTIN_PSRLQI128,
22371
22372 IX86_BUILTIN_PSLLDQ128,
22373 IX86_BUILTIN_PSLLW128,
22374 IX86_BUILTIN_PSLLD128,
22375 IX86_BUILTIN_PSLLQ128,
22376 IX86_BUILTIN_PSRAW128,
22377 IX86_BUILTIN_PSRAD128,
22378 IX86_BUILTIN_PSRLW128,
22379 IX86_BUILTIN_PSRLD128,
22380 IX86_BUILTIN_PSRLQ128,
22381
22382 IX86_BUILTIN_PUNPCKHBW128,
22383 IX86_BUILTIN_PUNPCKHWD128,
22384 IX86_BUILTIN_PUNPCKHDQ128,
22385 IX86_BUILTIN_PUNPCKHQDQ128,
22386 IX86_BUILTIN_PUNPCKLBW128,
22387 IX86_BUILTIN_PUNPCKLWD128,
22388 IX86_BUILTIN_PUNPCKLDQ128,
22389 IX86_BUILTIN_PUNPCKLQDQ128,
22390
22391 IX86_BUILTIN_CLFLUSH,
22392 IX86_BUILTIN_MFENCE,
22393 IX86_BUILTIN_LFENCE,
22394
22395 IX86_BUILTIN_BSRSI,
22396 IX86_BUILTIN_BSRDI,
22397 IX86_BUILTIN_RDPMC,
22398 IX86_BUILTIN_RDTSC,
22399 IX86_BUILTIN_RDTSCP,
22400 IX86_BUILTIN_ROLQI,
22401 IX86_BUILTIN_ROLHI,
22402 IX86_BUILTIN_RORQI,
22403 IX86_BUILTIN_RORHI,
22404
22405 /* SSE3. */
22406 IX86_BUILTIN_ADDSUBPS,
22407 IX86_BUILTIN_HADDPS,
22408 IX86_BUILTIN_HSUBPS,
22409 IX86_BUILTIN_MOVSHDUP,
22410 IX86_BUILTIN_MOVSLDUP,
22411 IX86_BUILTIN_ADDSUBPD,
22412 IX86_BUILTIN_HADDPD,
22413 IX86_BUILTIN_HSUBPD,
22414 IX86_BUILTIN_LDDQU,
22415
22416 IX86_BUILTIN_MONITOR,
22417 IX86_BUILTIN_MWAIT,
22418
22419 /* SSSE3. */
22420 IX86_BUILTIN_PHADDW,
22421 IX86_BUILTIN_PHADDD,
22422 IX86_BUILTIN_PHADDSW,
22423 IX86_BUILTIN_PHSUBW,
22424 IX86_BUILTIN_PHSUBD,
22425 IX86_BUILTIN_PHSUBSW,
22426 IX86_BUILTIN_PMADDUBSW,
22427 IX86_BUILTIN_PMULHRSW,
22428 IX86_BUILTIN_PSHUFB,
22429 IX86_BUILTIN_PSIGNB,
22430 IX86_BUILTIN_PSIGNW,
22431 IX86_BUILTIN_PSIGND,
22432 IX86_BUILTIN_PALIGNR,
22433 IX86_BUILTIN_PABSB,
22434 IX86_BUILTIN_PABSW,
22435 IX86_BUILTIN_PABSD,
22436
22437 IX86_BUILTIN_PHADDW128,
22438 IX86_BUILTIN_PHADDD128,
22439 IX86_BUILTIN_PHADDSW128,
22440 IX86_BUILTIN_PHSUBW128,
22441 IX86_BUILTIN_PHSUBD128,
22442 IX86_BUILTIN_PHSUBSW128,
22443 IX86_BUILTIN_PMADDUBSW128,
22444 IX86_BUILTIN_PMULHRSW128,
22445 IX86_BUILTIN_PSHUFB128,
22446 IX86_BUILTIN_PSIGNB128,
22447 IX86_BUILTIN_PSIGNW128,
22448 IX86_BUILTIN_PSIGND128,
22449 IX86_BUILTIN_PALIGNR128,
22450 IX86_BUILTIN_PABSB128,
22451 IX86_BUILTIN_PABSW128,
22452 IX86_BUILTIN_PABSD128,
22453
22454 /* AMDFAM10 - SSE4A New Instructions. */
22455 IX86_BUILTIN_MOVNTSD,
22456 IX86_BUILTIN_MOVNTSS,
22457 IX86_BUILTIN_EXTRQI,
22458 IX86_BUILTIN_EXTRQ,
22459 IX86_BUILTIN_INSERTQI,
22460 IX86_BUILTIN_INSERTQ,
22461
22462 /* SSE4.1. */
22463 IX86_BUILTIN_BLENDPD,
22464 IX86_BUILTIN_BLENDPS,
22465 IX86_BUILTIN_BLENDVPD,
22466 IX86_BUILTIN_BLENDVPS,
22467 IX86_BUILTIN_PBLENDVB128,
22468 IX86_BUILTIN_PBLENDW128,
22469
22470 IX86_BUILTIN_DPPD,
22471 IX86_BUILTIN_DPPS,
22472
22473 IX86_BUILTIN_INSERTPS128,
22474
22475 IX86_BUILTIN_MOVNTDQA,
22476 IX86_BUILTIN_MPSADBW128,
22477 IX86_BUILTIN_PACKUSDW128,
22478 IX86_BUILTIN_PCMPEQQ,
22479 IX86_BUILTIN_PHMINPOSUW128,
22480
22481 IX86_BUILTIN_PMAXSB128,
22482 IX86_BUILTIN_PMAXSD128,
22483 IX86_BUILTIN_PMAXUD128,
22484 IX86_BUILTIN_PMAXUW128,
22485
22486 IX86_BUILTIN_PMINSB128,
22487 IX86_BUILTIN_PMINSD128,
22488 IX86_BUILTIN_PMINUD128,
22489 IX86_BUILTIN_PMINUW128,
22490
22491 IX86_BUILTIN_PMOVSXBW128,
22492 IX86_BUILTIN_PMOVSXBD128,
22493 IX86_BUILTIN_PMOVSXBQ128,
22494 IX86_BUILTIN_PMOVSXWD128,
22495 IX86_BUILTIN_PMOVSXWQ128,
22496 IX86_BUILTIN_PMOVSXDQ128,
22497
22498 IX86_BUILTIN_PMOVZXBW128,
22499 IX86_BUILTIN_PMOVZXBD128,
22500 IX86_BUILTIN_PMOVZXBQ128,
22501 IX86_BUILTIN_PMOVZXWD128,
22502 IX86_BUILTIN_PMOVZXWQ128,
22503 IX86_BUILTIN_PMOVZXDQ128,
22504
22505 IX86_BUILTIN_PMULDQ128,
22506 IX86_BUILTIN_PMULLD128,
22507
22508 IX86_BUILTIN_ROUNDPD,
22509 IX86_BUILTIN_ROUNDPS,
22510 IX86_BUILTIN_ROUNDSD,
22511 IX86_BUILTIN_ROUNDSS,
22512
22513 IX86_BUILTIN_PTESTZ,
22514 IX86_BUILTIN_PTESTC,
22515 IX86_BUILTIN_PTESTNZC,
22516
22517 IX86_BUILTIN_VEC_INIT_V2SI,
22518 IX86_BUILTIN_VEC_INIT_V4HI,
22519 IX86_BUILTIN_VEC_INIT_V8QI,
22520 IX86_BUILTIN_VEC_EXT_V2DF,
22521 IX86_BUILTIN_VEC_EXT_V2DI,
22522 IX86_BUILTIN_VEC_EXT_V4SF,
22523 IX86_BUILTIN_VEC_EXT_V4SI,
22524 IX86_BUILTIN_VEC_EXT_V8HI,
22525 IX86_BUILTIN_VEC_EXT_V2SI,
22526 IX86_BUILTIN_VEC_EXT_V4HI,
22527 IX86_BUILTIN_VEC_EXT_V16QI,
22528 IX86_BUILTIN_VEC_SET_V2DI,
22529 IX86_BUILTIN_VEC_SET_V4SF,
22530 IX86_BUILTIN_VEC_SET_V4SI,
22531 IX86_BUILTIN_VEC_SET_V8HI,
22532 IX86_BUILTIN_VEC_SET_V4HI,
22533 IX86_BUILTIN_VEC_SET_V16QI,
22534
22535 IX86_BUILTIN_VEC_PACK_SFIX,
22536
22537 /* SSE4.2. */
22538 IX86_BUILTIN_CRC32QI,
22539 IX86_BUILTIN_CRC32HI,
22540 IX86_BUILTIN_CRC32SI,
22541 IX86_BUILTIN_CRC32DI,
22542
22543 IX86_BUILTIN_PCMPESTRI128,
22544 IX86_BUILTIN_PCMPESTRM128,
22545 IX86_BUILTIN_PCMPESTRA128,
22546 IX86_BUILTIN_PCMPESTRC128,
22547 IX86_BUILTIN_PCMPESTRO128,
22548 IX86_BUILTIN_PCMPESTRS128,
22549 IX86_BUILTIN_PCMPESTRZ128,
22550 IX86_BUILTIN_PCMPISTRI128,
22551 IX86_BUILTIN_PCMPISTRM128,
22552 IX86_BUILTIN_PCMPISTRA128,
22553 IX86_BUILTIN_PCMPISTRC128,
22554 IX86_BUILTIN_PCMPISTRO128,
22555 IX86_BUILTIN_PCMPISTRS128,
22556 IX86_BUILTIN_PCMPISTRZ128,
22557
22558 IX86_BUILTIN_PCMPGTQ,
22559
22560 /* AES instructions */
22561 IX86_BUILTIN_AESENC128,
22562 IX86_BUILTIN_AESENCLAST128,
22563 IX86_BUILTIN_AESDEC128,
22564 IX86_BUILTIN_AESDECLAST128,
22565 IX86_BUILTIN_AESIMC128,
22566 IX86_BUILTIN_AESKEYGENASSIST128,
22567
22568 /* PCLMUL instruction */
22569 IX86_BUILTIN_PCLMULQDQ128,
22570
22571 /* AVX */
22572 IX86_BUILTIN_ADDPD256,
22573 IX86_BUILTIN_ADDPS256,
22574 IX86_BUILTIN_ADDSUBPD256,
22575 IX86_BUILTIN_ADDSUBPS256,
22576 IX86_BUILTIN_ANDPD256,
22577 IX86_BUILTIN_ANDPS256,
22578 IX86_BUILTIN_ANDNPD256,
22579 IX86_BUILTIN_ANDNPS256,
22580 IX86_BUILTIN_BLENDPD256,
22581 IX86_BUILTIN_BLENDPS256,
22582 IX86_BUILTIN_BLENDVPD256,
22583 IX86_BUILTIN_BLENDVPS256,
22584 IX86_BUILTIN_DIVPD256,
22585 IX86_BUILTIN_DIVPS256,
22586 IX86_BUILTIN_DPPS256,
22587 IX86_BUILTIN_HADDPD256,
22588 IX86_BUILTIN_HADDPS256,
22589 IX86_BUILTIN_HSUBPD256,
22590 IX86_BUILTIN_HSUBPS256,
22591 IX86_BUILTIN_MAXPD256,
22592 IX86_BUILTIN_MAXPS256,
22593 IX86_BUILTIN_MINPD256,
22594 IX86_BUILTIN_MINPS256,
22595 IX86_BUILTIN_MULPD256,
22596 IX86_BUILTIN_MULPS256,
22597 IX86_BUILTIN_ORPD256,
22598 IX86_BUILTIN_ORPS256,
22599 IX86_BUILTIN_SHUFPD256,
22600 IX86_BUILTIN_SHUFPS256,
22601 IX86_BUILTIN_SUBPD256,
22602 IX86_BUILTIN_SUBPS256,
22603 IX86_BUILTIN_XORPD256,
22604 IX86_BUILTIN_XORPS256,
22605 IX86_BUILTIN_CMPSD,
22606 IX86_BUILTIN_CMPSS,
22607 IX86_BUILTIN_CMPPD,
22608 IX86_BUILTIN_CMPPS,
22609 IX86_BUILTIN_CMPPD256,
22610 IX86_BUILTIN_CMPPS256,
22611 IX86_BUILTIN_CVTDQ2PD256,
22612 IX86_BUILTIN_CVTDQ2PS256,
22613 IX86_BUILTIN_CVTPD2PS256,
22614 IX86_BUILTIN_CVTPS2DQ256,
22615 IX86_BUILTIN_CVTPS2PD256,
22616 IX86_BUILTIN_CVTTPD2DQ256,
22617 IX86_BUILTIN_CVTPD2DQ256,
22618 IX86_BUILTIN_CVTTPS2DQ256,
22619 IX86_BUILTIN_EXTRACTF128PD256,
22620 IX86_BUILTIN_EXTRACTF128PS256,
22621 IX86_BUILTIN_EXTRACTF128SI256,
22622 IX86_BUILTIN_VZEROALL,
22623 IX86_BUILTIN_VZEROUPPER,
22624 IX86_BUILTIN_VPERMILVARPD,
22625 IX86_BUILTIN_VPERMILVARPS,
22626 IX86_BUILTIN_VPERMILVARPD256,
22627 IX86_BUILTIN_VPERMILVARPS256,
22628 IX86_BUILTIN_VPERMILPD,
22629 IX86_BUILTIN_VPERMILPS,
22630 IX86_BUILTIN_VPERMILPD256,
22631 IX86_BUILTIN_VPERMILPS256,
22632 IX86_BUILTIN_VPERMIL2PD,
22633 IX86_BUILTIN_VPERMIL2PS,
22634 IX86_BUILTIN_VPERMIL2PD256,
22635 IX86_BUILTIN_VPERMIL2PS256,
22636 IX86_BUILTIN_VPERM2F128PD256,
22637 IX86_BUILTIN_VPERM2F128PS256,
22638 IX86_BUILTIN_VPERM2F128SI256,
22639 IX86_BUILTIN_VBROADCASTSS,
22640 IX86_BUILTIN_VBROADCASTSD256,
22641 IX86_BUILTIN_VBROADCASTSS256,
22642 IX86_BUILTIN_VBROADCASTPD256,
22643 IX86_BUILTIN_VBROADCASTPS256,
22644 IX86_BUILTIN_VINSERTF128PD256,
22645 IX86_BUILTIN_VINSERTF128PS256,
22646 IX86_BUILTIN_VINSERTF128SI256,
22647 IX86_BUILTIN_LOADUPD256,
22648 IX86_BUILTIN_LOADUPS256,
22649 IX86_BUILTIN_STOREUPD256,
22650 IX86_BUILTIN_STOREUPS256,
22651 IX86_BUILTIN_LDDQU256,
22652 IX86_BUILTIN_MOVNTDQ256,
22653 IX86_BUILTIN_MOVNTPD256,
22654 IX86_BUILTIN_MOVNTPS256,
22655 IX86_BUILTIN_LOADDQU256,
22656 IX86_BUILTIN_STOREDQU256,
22657 IX86_BUILTIN_MASKLOADPD,
22658 IX86_BUILTIN_MASKLOADPS,
22659 IX86_BUILTIN_MASKSTOREPD,
22660 IX86_BUILTIN_MASKSTOREPS,
22661 IX86_BUILTIN_MASKLOADPD256,
22662 IX86_BUILTIN_MASKLOADPS256,
22663 IX86_BUILTIN_MASKSTOREPD256,
22664 IX86_BUILTIN_MASKSTOREPS256,
22665 IX86_BUILTIN_MOVSHDUP256,
22666 IX86_BUILTIN_MOVSLDUP256,
22667 IX86_BUILTIN_MOVDDUP256,
22668
22669 IX86_BUILTIN_SQRTPD256,
22670 IX86_BUILTIN_SQRTPS256,
22671 IX86_BUILTIN_SQRTPS_NR256,
22672 IX86_BUILTIN_RSQRTPS256,
22673 IX86_BUILTIN_RSQRTPS_NR256,
22674
22675 IX86_BUILTIN_RCPPS256,
22676
22677 IX86_BUILTIN_ROUNDPD256,
22678 IX86_BUILTIN_ROUNDPS256,
22679
22680 IX86_BUILTIN_UNPCKHPD256,
22681 IX86_BUILTIN_UNPCKLPD256,
22682 IX86_BUILTIN_UNPCKHPS256,
22683 IX86_BUILTIN_UNPCKLPS256,
22684
22685 IX86_BUILTIN_SI256_SI,
22686 IX86_BUILTIN_PS256_PS,
22687 IX86_BUILTIN_PD256_PD,
22688 IX86_BUILTIN_SI_SI256,
22689 IX86_BUILTIN_PS_PS256,
22690 IX86_BUILTIN_PD_PD256,
22691
22692 IX86_BUILTIN_VTESTZPD,
22693 IX86_BUILTIN_VTESTCPD,
22694 IX86_BUILTIN_VTESTNZCPD,
22695 IX86_BUILTIN_VTESTZPS,
22696 IX86_BUILTIN_VTESTCPS,
22697 IX86_BUILTIN_VTESTNZCPS,
22698 IX86_BUILTIN_VTESTZPD256,
22699 IX86_BUILTIN_VTESTCPD256,
22700 IX86_BUILTIN_VTESTNZCPD256,
22701 IX86_BUILTIN_VTESTZPS256,
22702 IX86_BUILTIN_VTESTCPS256,
22703 IX86_BUILTIN_VTESTNZCPS256,
22704 IX86_BUILTIN_PTESTZ256,
22705 IX86_BUILTIN_PTESTC256,
22706 IX86_BUILTIN_PTESTNZC256,
22707
22708 IX86_BUILTIN_MOVMSKPD256,
22709 IX86_BUILTIN_MOVMSKPS256,
22710
22711 /* TFmode support builtins. */
22712 IX86_BUILTIN_INFQ,
22713 IX86_BUILTIN_HUGE_VALQ,
22714 IX86_BUILTIN_FABSQ,
22715 IX86_BUILTIN_COPYSIGNQ,
22716
22717 /* Vectorizer support builtins. */
22718 IX86_BUILTIN_CPYSGNPS,
22719 IX86_BUILTIN_CPYSGNPD,
22720
22721 IX86_BUILTIN_CVTUDQ2PS,
22722
22723 IX86_BUILTIN_VEC_PERM_V2DF,
22724 IX86_BUILTIN_VEC_PERM_V4SF,
22725 IX86_BUILTIN_VEC_PERM_V2DI,
22726 IX86_BUILTIN_VEC_PERM_V4SI,
22727 IX86_BUILTIN_VEC_PERM_V8HI,
22728 IX86_BUILTIN_VEC_PERM_V16QI,
22729 IX86_BUILTIN_VEC_PERM_V2DI_U,
22730 IX86_BUILTIN_VEC_PERM_V4SI_U,
22731 IX86_BUILTIN_VEC_PERM_V8HI_U,
22732 IX86_BUILTIN_VEC_PERM_V16QI_U,
22733 IX86_BUILTIN_VEC_PERM_V4DF,
22734 IX86_BUILTIN_VEC_PERM_V8SF,
22735
22736 /* FMA4 and XOP instructions. */
22737 IX86_BUILTIN_VFMADDSS,
22738 IX86_BUILTIN_VFMADDSD,
22739 IX86_BUILTIN_VFMADDPS,
22740 IX86_BUILTIN_VFMADDPD,
22741 IX86_BUILTIN_VFMSUBSS,
22742 IX86_BUILTIN_VFMSUBSD,
22743 IX86_BUILTIN_VFMSUBPS,
22744 IX86_BUILTIN_VFMSUBPD,
22745 IX86_BUILTIN_VFMADDSUBPS,
22746 IX86_BUILTIN_VFMADDSUBPD,
22747 IX86_BUILTIN_VFMSUBADDPS,
22748 IX86_BUILTIN_VFMSUBADDPD,
22749 IX86_BUILTIN_VFNMADDSS,
22750 IX86_BUILTIN_VFNMADDSD,
22751 IX86_BUILTIN_VFNMADDPS,
22752 IX86_BUILTIN_VFNMADDPD,
22753 IX86_BUILTIN_VFNMSUBSS,
22754 IX86_BUILTIN_VFNMSUBSD,
22755 IX86_BUILTIN_VFNMSUBPS,
22756 IX86_BUILTIN_VFNMSUBPD,
22757 IX86_BUILTIN_VFMADDPS256,
22758 IX86_BUILTIN_VFMADDPD256,
22759 IX86_BUILTIN_VFMSUBPS256,
22760 IX86_BUILTIN_VFMSUBPD256,
22761 IX86_BUILTIN_VFMADDSUBPS256,
22762 IX86_BUILTIN_VFMADDSUBPD256,
22763 IX86_BUILTIN_VFMSUBADDPS256,
22764 IX86_BUILTIN_VFMSUBADDPD256,
22765 IX86_BUILTIN_VFNMADDPS256,
22766 IX86_BUILTIN_VFNMADDPD256,
22767 IX86_BUILTIN_VFNMSUBPS256,
22768 IX86_BUILTIN_VFNMSUBPD256,
22769
22770 IX86_BUILTIN_VPCMOV,
22771 IX86_BUILTIN_VPCMOV_V2DI,
22772 IX86_BUILTIN_VPCMOV_V4SI,
22773 IX86_BUILTIN_VPCMOV_V8HI,
22774 IX86_BUILTIN_VPCMOV_V16QI,
22775 IX86_BUILTIN_VPCMOV_V4SF,
22776 IX86_BUILTIN_VPCMOV_V2DF,
22777 IX86_BUILTIN_VPCMOV256,
22778 IX86_BUILTIN_VPCMOV_V4DI256,
22779 IX86_BUILTIN_VPCMOV_V8SI256,
22780 IX86_BUILTIN_VPCMOV_V16HI256,
22781 IX86_BUILTIN_VPCMOV_V32QI256,
22782 IX86_BUILTIN_VPCMOV_V8SF256,
22783 IX86_BUILTIN_VPCMOV_V4DF256,
22784
22785 IX86_BUILTIN_VPPERM,
22786
22787 IX86_BUILTIN_VPMACSSWW,
22788 IX86_BUILTIN_VPMACSWW,
22789 IX86_BUILTIN_VPMACSSWD,
22790 IX86_BUILTIN_VPMACSWD,
22791 IX86_BUILTIN_VPMACSSDD,
22792 IX86_BUILTIN_VPMACSDD,
22793 IX86_BUILTIN_VPMACSSDQL,
22794 IX86_BUILTIN_VPMACSSDQH,
22795 IX86_BUILTIN_VPMACSDQL,
22796 IX86_BUILTIN_VPMACSDQH,
22797 IX86_BUILTIN_VPMADCSSWD,
22798 IX86_BUILTIN_VPMADCSWD,
22799
22800 IX86_BUILTIN_VPHADDBW,
22801 IX86_BUILTIN_VPHADDBD,
22802 IX86_BUILTIN_VPHADDBQ,
22803 IX86_BUILTIN_VPHADDWD,
22804 IX86_BUILTIN_VPHADDWQ,
22805 IX86_BUILTIN_VPHADDDQ,
22806 IX86_BUILTIN_VPHADDUBW,
22807 IX86_BUILTIN_VPHADDUBD,
22808 IX86_BUILTIN_VPHADDUBQ,
22809 IX86_BUILTIN_VPHADDUWD,
22810 IX86_BUILTIN_VPHADDUWQ,
22811 IX86_BUILTIN_VPHADDUDQ,
22812 IX86_BUILTIN_VPHSUBBW,
22813 IX86_BUILTIN_VPHSUBWD,
22814 IX86_BUILTIN_VPHSUBDQ,
22815
22816 IX86_BUILTIN_VPROTB,
22817 IX86_BUILTIN_VPROTW,
22818 IX86_BUILTIN_VPROTD,
22819 IX86_BUILTIN_VPROTQ,
22820 IX86_BUILTIN_VPROTB_IMM,
22821 IX86_BUILTIN_VPROTW_IMM,
22822 IX86_BUILTIN_VPROTD_IMM,
22823 IX86_BUILTIN_VPROTQ_IMM,
22824
22825 IX86_BUILTIN_VPSHLB,
22826 IX86_BUILTIN_VPSHLW,
22827 IX86_BUILTIN_VPSHLD,
22828 IX86_BUILTIN_VPSHLQ,
22829 IX86_BUILTIN_VPSHAB,
22830 IX86_BUILTIN_VPSHAW,
22831 IX86_BUILTIN_VPSHAD,
22832 IX86_BUILTIN_VPSHAQ,
22833
22834 IX86_BUILTIN_VFRCZSS,
22835 IX86_BUILTIN_VFRCZSD,
22836 IX86_BUILTIN_VFRCZPS,
22837 IX86_BUILTIN_VFRCZPD,
22838 IX86_BUILTIN_VFRCZPS256,
22839 IX86_BUILTIN_VFRCZPD256,
22840
22841 IX86_BUILTIN_VPCOMEQUB,
22842 IX86_BUILTIN_VPCOMNEUB,
22843 IX86_BUILTIN_VPCOMLTUB,
22844 IX86_BUILTIN_VPCOMLEUB,
22845 IX86_BUILTIN_VPCOMGTUB,
22846 IX86_BUILTIN_VPCOMGEUB,
22847 IX86_BUILTIN_VPCOMFALSEUB,
22848 IX86_BUILTIN_VPCOMTRUEUB,
22849
22850 IX86_BUILTIN_VPCOMEQUW,
22851 IX86_BUILTIN_VPCOMNEUW,
22852 IX86_BUILTIN_VPCOMLTUW,
22853 IX86_BUILTIN_VPCOMLEUW,
22854 IX86_BUILTIN_VPCOMGTUW,
22855 IX86_BUILTIN_VPCOMGEUW,
22856 IX86_BUILTIN_VPCOMFALSEUW,
22857 IX86_BUILTIN_VPCOMTRUEUW,
22858
22859 IX86_BUILTIN_VPCOMEQUD,
22860 IX86_BUILTIN_VPCOMNEUD,
22861 IX86_BUILTIN_VPCOMLTUD,
22862 IX86_BUILTIN_VPCOMLEUD,
22863 IX86_BUILTIN_VPCOMGTUD,
22864 IX86_BUILTIN_VPCOMGEUD,
22865 IX86_BUILTIN_VPCOMFALSEUD,
22866 IX86_BUILTIN_VPCOMTRUEUD,
22867
22868 IX86_BUILTIN_VPCOMEQUQ,
22869 IX86_BUILTIN_VPCOMNEUQ,
22870 IX86_BUILTIN_VPCOMLTUQ,
22871 IX86_BUILTIN_VPCOMLEUQ,
22872 IX86_BUILTIN_VPCOMGTUQ,
22873 IX86_BUILTIN_VPCOMGEUQ,
22874 IX86_BUILTIN_VPCOMFALSEUQ,
22875 IX86_BUILTIN_VPCOMTRUEUQ,
22876
22877 IX86_BUILTIN_VPCOMEQB,
22878 IX86_BUILTIN_VPCOMNEB,
22879 IX86_BUILTIN_VPCOMLTB,
22880 IX86_BUILTIN_VPCOMLEB,
22881 IX86_BUILTIN_VPCOMGTB,
22882 IX86_BUILTIN_VPCOMGEB,
22883 IX86_BUILTIN_VPCOMFALSEB,
22884 IX86_BUILTIN_VPCOMTRUEB,
22885
22886 IX86_BUILTIN_VPCOMEQW,
22887 IX86_BUILTIN_VPCOMNEW,
22888 IX86_BUILTIN_VPCOMLTW,
22889 IX86_BUILTIN_VPCOMLEW,
22890 IX86_BUILTIN_VPCOMGTW,
22891 IX86_BUILTIN_VPCOMGEW,
22892 IX86_BUILTIN_VPCOMFALSEW,
22893 IX86_BUILTIN_VPCOMTRUEW,
22894
22895 IX86_BUILTIN_VPCOMEQD,
22896 IX86_BUILTIN_VPCOMNED,
22897 IX86_BUILTIN_VPCOMLTD,
22898 IX86_BUILTIN_VPCOMLED,
22899 IX86_BUILTIN_VPCOMGTD,
22900 IX86_BUILTIN_VPCOMGED,
22901 IX86_BUILTIN_VPCOMFALSED,
22902 IX86_BUILTIN_VPCOMTRUED,
22903
22904 IX86_BUILTIN_VPCOMEQQ,
22905 IX86_BUILTIN_VPCOMNEQ,
22906 IX86_BUILTIN_VPCOMLTQ,
22907 IX86_BUILTIN_VPCOMLEQ,
22908 IX86_BUILTIN_VPCOMGTQ,
22909 IX86_BUILTIN_VPCOMGEQ,
22910 IX86_BUILTIN_VPCOMFALSEQ,
22911 IX86_BUILTIN_VPCOMTRUEQ,
22912
22913 /* LWP instructions. */
22914 IX86_BUILTIN_LLWPCB,
22915 IX86_BUILTIN_SLWPCB,
22916 IX86_BUILTIN_LWPVAL32,
22917 IX86_BUILTIN_LWPVAL64,
22918 IX86_BUILTIN_LWPINS32,
22919 IX86_BUILTIN_LWPINS64,
22920
22921 IX86_BUILTIN_CLZS,
22922
22923 /* FSGSBASE instructions. */
22924 IX86_BUILTIN_RDFSBASE32,
22925 IX86_BUILTIN_RDFSBASE64,
22926 IX86_BUILTIN_RDGSBASE32,
22927 IX86_BUILTIN_RDGSBASE64,
22928 IX86_BUILTIN_WRFSBASE32,
22929 IX86_BUILTIN_WRFSBASE64,
22930 IX86_BUILTIN_WRGSBASE32,
22931 IX86_BUILTIN_WRGSBASE64,
22932
22933 /* RDRND instructions. */
22934 IX86_BUILTIN_RDRAND16,
22935 IX86_BUILTIN_RDRAND32,
22936 IX86_BUILTIN_RDRAND64,
22937
22938 /* F16C instructions. */
22939 IX86_BUILTIN_CVTPH2PS,
22940 IX86_BUILTIN_CVTPH2PS256,
22941 IX86_BUILTIN_CVTPS2PH,
22942 IX86_BUILTIN_CVTPS2PH256,
22943
22944 IX86_BUILTIN_MAX
22945 };
22946
22947 /* Table for the ix86 builtin decls. */
22948 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
22949
22950 /* Table of all of the builtin functions that are possible with different ISA's
22951 but are waiting to be built until a function is declared to use that
22952 ISA. */
22953 struct builtin_isa {
22954 const char *name; /* function name */
22955 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
22956 int isa; /* isa_flags this builtin is defined for */
22957 bool const_p; /* true if the declaration is constant */
22958 bool set_and_not_built_p;
22959 };
22960
22961 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
22962
22963
22964 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
22965 of which isa_flags to use in the ix86_builtins_isa array. Stores the
22966 function decl in the ix86_builtins array. Returns the function decl or
22967 NULL_TREE, if the builtin was not added.
22968
22969 If the front end has a special hook for builtin functions, delay adding
22970 builtin functions that aren't in the current ISA until the ISA is changed
22971 with function specific optimization. Doing so, can save about 300K for the
22972 default compiler. When the builtin is expanded, check at that time whether
22973 it is valid.
22974
22975 If the front end doesn't have a special hook, record all builtins, even if
22976 it isn't an instruction set in the current ISA in case the user uses
22977 function specific options for a different ISA, so that we don't get scope
22978 errors if a builtin is added in the middle of a function scope. */
22979
22980 static inline tree
22981 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
22982 enum ix86_builtins code)
22983 {
22984 tree decl = NULL_TREE;
22985
22986 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
22987 {
22988 ix86_builtins_isa[(int) code].isa = mask;
22989
22990 mask &= ~OPTION_MASK_ISA_64BIT;
22991 if (mask == 0
22992 || (mask & ix86_isa_flags) != 0
22993 || (lang_hooks.builtin_function
22994 == lang_hooks.builtin_function_ext_scope))
22995
22996 {
22997 tree type = ix86_get_builtin_func_type (tcode);
22998 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
22999 NULL, NULL_TREE);
23000 ix86_builtins[(int) code] = decl;
23001 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
23002 }
23003 else
23004 {
23005 ix86_builtins[(int) code] = NULL_TREE;
23006 ix86_builtins_isa[(int) code].tcode = tcode;
23007 ix86_builtins_isa[(int) code].name = name;
23008 ix86_builtins_isa[(int) code].const_p = false;
23009 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
23010 }
23011 }
23012
23013 return decl;
23014 }
23015
23016 /* Like def_builtin, but also marks the function decl "const". */
23017
23018 static inline tree
23019 def_builtin_const (int mask, const char *name,
23020 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
23021 {
23022 tree decl = def_builtin (mask, name, tcode, code);
23023 if (decl)
23024 TREE_READONLY (decl) = 1;
23025 else
23026 ix86_builtins_isa[(int) code].const_p = true;
23027
23028 return decl;
23029 }
23030
23031 /* Add any new builtin functions for a given ISA that may not have been
23032 declared. This saves a bit of space compared to adding all of the
23033 declarations to the tree, even if we didn't use them. */
23034
23035 static void
23036 ix86_add_new_builtins (int isa)
23037 {
23038 int i;
23039
23040 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
23041 {
23042 if ((ix86_builtins_isa[i].isa & isa) != 0
23043 && ix86_builtins_isa[i].set_and_not_built_p)
23044 {
23045 tree decl, type;
23046
23047 /* Don't define the builtin again. */
23048 ix86_builtins_isa[i].set_and_not_built_p = false;
23049
23050 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
23051 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
23052 type, i, BUILT_IN_MD, NULL,
23053 NULL_TREE);
23054
23055 ix86_builtins[i] = decl;
23056 if (ix86_builtins_isa[i].const_p)
23057 TREE_READONLY (decl) = 1;
23058 }
23059 }
23060 }
23061
23062 /* Bits for builtin_description.flag. */
23063
23064 /* Set when we don't support the comparison natively, and should
23065 swap_comparison in order to support it. */
23066 #define BUILTIN_DESC_SWAP_OPERANDS 1
23067
23068 struct builtin_description
23069 {
23070 const unsigned int mask;
23071 const enum insn_code icode;
23072 const char *const name;
23073 const enum ix86_builtins code;
23074 const enum rtx_code comparison;
23075 const int flag;
23076 };
23077
23078 static const struct builtin_description bdesc_comi[] =
23079 {
23080 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
23081 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
23082 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
23083 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
23084 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
23085 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
23086 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
23087 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
23088 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
23089 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
23090 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
23091 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
23092 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
23093 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
23094 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
23095 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
23096 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
23097 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
23098 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
23099 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
23100 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
23101 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
23102 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
23103 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
23104 };
23105
23106 static const struct builtin_description bdesc_pcmpestr[] =
23107 {
23108 /* SSE4.2 */
23109 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
23110 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
23111 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
23112 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
23113 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
23114 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
23115 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
23116 };
23117
23118 static const struct builtin_description bdesc_pcmpistr[] =
23119 {
23120 /* SSE4.2 */
23121 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
23122 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
23123 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
23124 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
23125 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
23126 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
23127 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
23128 };
23129
23130 /* Special builtins with variable number of arguments. */
23131 static const struct builtin_description bdesc_special_args[] =
23132 {
23133 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
23134 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
23135
23136 /* MMX */
23137 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
23138
23139 /* 3DNow! */
23140 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
23141
23142 /* SSE */
23143 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
23144 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
23145 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
23146
23147 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
23148 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
23149 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
23150 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
23151
23152 /* SSE or 3DNow!A */
23153 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
23154 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
23155
23156 /* SSE2 */
23157 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
23158 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
23159 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
23160 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
23161 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
23162 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
23163 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
23164 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
23165 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
23166
23167 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
23168 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
23169
23170 /* SSE3 */
23171 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
23172
23173 /* SSE4.1 */
23174 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
23175
23176 /* SSE4A */
23177 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
23178 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
23179
23180 /* AVX */
23181 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
23182 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
23183
23184 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
23185 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
23186 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
23187 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
23188 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
23189
23190 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
23191 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
23192 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
23193 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
23194 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
23195 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
23196 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
23197
23198 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
23199 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
23200 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
23201
23202 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
23203 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
23204 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
23205 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
23206 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
23207 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
23208 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
23209 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
23210
23211 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
23212 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
23213 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
23214 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
23215 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
23216 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
23217
23218 /* FSGSBASE */
23219 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
23220 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
23221 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
23222 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
23223 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
23224 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
23225 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
23226 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
23227
23228 /* RDRND */
23229 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandhi, "__builtin_ia32_rdrand16", IX86_BUILTIN_RDRAND16, UNKNOWN, (int) UINT16_FTYPE_VOID },
23230 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandsi, "__builtin_ia32_rdrand32", IX86_BUILTIN_RDRAND32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
23231 { OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, CODE_FOR_rdranddi, "__builtin_ia32_rdrand64", IX86_BUILTIN_RDRAND64, UNKNOWN, (int) UINT64_FTYPE_VOID },
23232 };
23233
23234 /* Builtins with variable number of arguments. */
23235 static const struct builtin_description bdesc_args[] =
23236 {
23237 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
23238 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
23239 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
23240 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
23241 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
23242 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
23243 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
23244
23245 /* MMX */
23246 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23247 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23248 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23249 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23250 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23251 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23252
23253 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23254 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23255 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23256 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23257 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23258 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23259 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23260 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23261
23262 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23263 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23264
23265 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23266 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23267 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23268 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23269
23270 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23271 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23272 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23273 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23274 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23275 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23276
23277 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23278 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23279 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23280 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23281 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
23282 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
23283
23284 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
23285 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
23286 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
23287
23288 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
23289
23290 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
23291 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
23292 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
23293 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
23294 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
23295 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
23296
23297 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
23298 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
23299 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
23300 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
23301 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
23302 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
23303
23304 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
23305 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
23306 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
23307 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
23308
23309 /* 3DNow! */
23310 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
23311 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
23312 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
23313 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
23314
23315 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23316 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23317 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23318 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
23319 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
23320 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
23321 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23322 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23323 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23324 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23325 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23326 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23327 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23328 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23329 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23330
23331 /* 3DNow!A */
23332 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
23333 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
23334 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
23335 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
23336 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23337 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23338
23339 /* SSE */
23340 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
23341 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23342 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23343 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23344 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23345 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23346 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
23347 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
23348 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
23349 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
23350 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
23351 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
23352
23353 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23354
23355 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23356 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23357 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23358 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23359 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23360 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23361 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23362 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23363
23364 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
23365 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
23366 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
23367 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23368 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23369 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23370 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
23371 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
23372 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
23373 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23374 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
23375 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23376 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
23377 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
23378 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
23379 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23380 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
23381 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
23382 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
23383 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23384 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23385 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23386
23387 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23388 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23389 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23390 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23391
23392 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23393 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23394 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23395 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23396
23397 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23398
23399 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23400 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23401 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23402 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23403 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23404
23405 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
23406 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
23407 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
23408
23409 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
23410
23411 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
23412 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
23413 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
23414
23415 /* SSE MMX or 3Dnow!A */
23416 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23417 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23418 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23419
23420 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23421 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23422 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23423 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23424
23425 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
23426 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
23427
23428 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
23429
23430 /* SSE2 */
23431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23432
23433 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
23434 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
23435 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
23436 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
23437 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
23438 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
23439 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
23440 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
23441 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
23442 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
23443 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
23444 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
23445
23446 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
23447 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
23448 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
23449 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
23450 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
23451 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
23452
23453 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
23454 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
23455 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
23456 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
23457 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
23458
23459 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
23460
23461 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
23462 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
23463 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
23464 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
23465
23466 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
23467 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
23468 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
23469
23470 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23471 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23472 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23473 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23474 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23475 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23476 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23477 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23478
23479 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
23480 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
23481 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
23482 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
23483 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
23484 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23485 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
23486 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
23487 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
23488 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
23489 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
23490 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23491 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
23492 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
23493 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
23494 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23495 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
23496 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
23497 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
23498 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23499
23500 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23501 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23502 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23503 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23504
23505 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23507 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23508 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23509
23510 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23511
23512 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23513 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23514 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23515
23516 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
23517
23518 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23519 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23520 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23521 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23522 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23523 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23524 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23525 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23526
23527 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23528 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23529 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23530 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23531 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23532 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23533 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23534 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23535
23536 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23537 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
23538
23539 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23540 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23541 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23542 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23543
23544 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23545 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23546
23547 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23548 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23549 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23550 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23551 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23552 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23553
23554 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23555 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23556 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23557 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23558
23559 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23560 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23561 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23562 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23563 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23564 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23565 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23566 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23567
23568 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
23569 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
23570 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
23571
23572 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23573 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
23574
23575 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
23576 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
23577
23578 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
23579
23580 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
23581 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
23582 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
23583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
23584
23585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
23586 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23587 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23588 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
23589 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23590 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23591 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
23592
23593 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
23594 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23595 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23596 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
23597 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23598 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23599 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
23600
23601 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23602 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23603 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23604 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23605
23606 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
23607 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
23608 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
23609
23610 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
23611
23612 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
23613 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
23614
23615 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
23616
23617 /* SSE2 MMX */
23618 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
23619 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
23620
23621 /* SSE3 */
23622 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
23623 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23624
23625 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23626 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23627 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23628 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23629 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23630 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23631
23632 /* SSSE3 */
23633 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
23634 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
23635 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
23636 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
23637 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
23638 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
23639
23640 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23641 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23642 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23643 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23644 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23645 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23646 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23647 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23648 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23649 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23650 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23651 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23652 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
23653 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
23654 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23655 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23656 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23657 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23658 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23659 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23660 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23661 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23662 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23663 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23664
23665 /* SSSE3. */
23666 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
23667 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
23668
23669 /* SSE4.1 */
23670 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23671 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23672 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
23673 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
23674 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23675 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23676 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23677 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
23678 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
23679 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
23680
23681 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
23682 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
23683 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
23684 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
23685 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
23686 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
23687 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
23688 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
23689 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
23690 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
23691 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
23692 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
23693 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
23694
23695 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
23696 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23697 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23698 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23699 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23700 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23701 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23702 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23703 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23704 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23705 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
23706 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23707
23708 /* SSE4.1 */
23709 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
23710 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
23711 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23712 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23713
23714 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23715 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23716 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23717
23718 /* SSE4.2 */
23719 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23720 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
23721 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
23722 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
23723 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
23724
23725 /* SSE4A */
23726 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
23727 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
23728 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
23729 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23730
23731 /* AES */
23732 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
23733 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
23734
23735 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23736 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23737 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23738 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23739
23740 /* PCLMUL */
23741 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
23742
23743 /* AVX */
23744 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23745 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23748 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23749 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23750 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23752 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23753 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23754 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23755 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23756 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23757 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23758 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23759 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23760 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23761 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23762 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23763 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23764 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23765 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23766 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23767 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23768 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23769 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23770
23771 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
23772 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
23773 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
23774 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
23775
23776 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23777 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23778 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
23779 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
23780 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23781 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23782 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23783 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23784 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23785 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23786 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23787 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23788 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23789 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
23790 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
23791 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
23792 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
23793 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
23794 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
23795 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
23797 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23798 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23799 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23800 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23801 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23802 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
23803 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
23804 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
23805 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23806 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23807 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
23808 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
23809 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
23810
23811 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23812 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23813 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23814
23815 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23816 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23817 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23818 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23819 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23820
23821 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23822
23823 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23824 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23825
23826 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23827 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23828 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23829 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23830
23831 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
23832 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
23833 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
23834 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
23835 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
23836 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
23837
23838 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23839 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23840 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23841 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23842 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23843 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23844 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23845 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23846 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23847 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23848 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23849 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23850 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23851 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23852 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23853
23854 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
23855 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
23856
23857 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
23858
23859 /* F16C */
23860 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
23861 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
23862 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
23863 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
23864 };
23865
23866 /* FMA4 and XOP. */
23867 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
23868 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
23869 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
23870 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
23871 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
23872 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
23873 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
23874 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
23875 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
23876 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
23877 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
23878 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
23879 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
23880 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
23881 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
23882 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
23883 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
23884 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
23885 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
23886 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
23887 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
23888 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
23889 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
23890 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
23891 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
23892 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
23893 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
23894 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
23895 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
23896 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
23897 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
23898 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
23899 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
23900 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
23901 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
23902 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
23903 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
23904 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
23905 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
23906 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
23907 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
23908 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
23909 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
23910 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
23911 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
23912 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
23913 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
23914 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
23915 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
23916 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
23917 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
23918 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
23919
23920 static const struct builtin_description bdesc_multi_arg[] =
23921 {
23922 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv4sf4, "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23923 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv2df4, "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23924 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4sf4, "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23925 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv2df4, "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23926 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv4sf4, "__builtin_ia32_vfmsubss", IX86_BUILTIN_VFMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23927 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv2df4, "__builtin_ia32_vfmsubsd", IX86_BUILTIN_VFMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23928 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4sf4, "__builtin_ia32_vfmsubps", IX86_BUILTIN_VFMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23929 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv2df4, "__builtin_ia32_vfmsubpd", IX86_BUILTIN_VFMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23930
23931 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv4sf4, "__builtin_ia32_vfnmaddss", IX86_BUILTIN_VFNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23932 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv2df4, "__builtin_ia32_vfnmaddsd", IX86_BUILTIN_VFNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23933 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4sf4, "__builtin_ia32_vfnmaddps", IX86_BUILTIN_VFNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23934 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv2df4, "__builtin_ia32_vfnmaddpd", IX86_BUILTIN_VFNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23935 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv4sf4, "__builtin_ia32_vfnmsubss", IX86_BUILTIN_VFNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23936 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv2df4, "__builtin_ia32_vfnmsubsd", IX86_BUILTIN_VFNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23937 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4sf4, "__builtin_ia32_vfnmsubps", IX86_BUILTIN_VFNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23938 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv2df4, "__builtin_ia32_vfnmsubpd", IX86_BUILTIN_VFNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23939
23940 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4sf4, "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23941 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv2df4, "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23942 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4sf4, "__builtin_ia32_vfmsubaddps", IX86_BUILTIN_VFMSUBADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23943 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv2df4, "__builtin_ia32_vfmsubaddpd", IX86_BUILTIN_VFMSUBADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23944
23945 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv8sf4256, "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23946 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4df4256, "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23947 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv8sf4256, "__builtin_ia32_vfmsubps256", IX86_BUILTIN_VFMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23948 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4df4256, "__builtin_ia32_vfmsubpd256", IX86_BUILTIN_VFMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23949
23950 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv8sf4256, "__builtin_ia32_vfnmaddps256", IX86_BUILTIN_VFNMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23951 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4df4256, "__builtin_ia32_vfnmaddpd256", IX86_BUILTIN_VFNMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23952 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv8sf4256, "__builtin_ia32_vfnmsubps256", IX86_BUILTIN_VFNMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23953 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4df4256, "__builtin_ia32_vfnmsubpd256", IX86_BUILTIN_VFNMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23954
23955 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv8sf4, "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23956 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4df4, "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23957 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv8sf4, "__builtin_ia32_vfmsubaddps256", IX86_BUILTIN_VFMSUBADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23958 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4, "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23959
23960 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
23961 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
23962 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
23963 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
23964 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
23965 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
23966 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
23967
23968 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
23969 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
23970 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
23971 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
23972 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
23973 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23974 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23975
23976 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
23977
23978 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
23979 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
23980 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23981 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23982 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
23983 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
23984 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23985 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23986 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23987 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23988 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23989 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23990
23991 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23992 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
23993 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
23994 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
23995 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
23996 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
23997 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
23998 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
23999 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
24000 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
24001 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
24002 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
24003 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
24004 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
24005 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
24006 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
24007
24008 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
24009 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
24010 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
24011 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
24012 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2256, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
24013 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2256, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
24014
24015 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
24016 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
24017 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
24018 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
24019 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
24020 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
24021 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
24022 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
24023 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
24024 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
24025 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
24026 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
24027 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
24028 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
24029 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
24030
24031 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
24032 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
24033 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
24034 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
24035 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
24036 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
24037 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
24038
24039 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
24040 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
24041 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
24042 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
24043 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
24044 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
24045 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
24046
24047 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
24048 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
24049 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
24050 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
24051 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
24052 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
24053 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
24054
24055 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
24056 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
24057 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
24058 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
24059 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
24060 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
24061 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
24062
24063 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
24064 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
24065 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
24066 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
24067 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
24068 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
24069 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
24070
24071 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
24072 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
24073 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
24074 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
24075 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
24076 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
24077 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
24078
24079 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
24080 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
24081 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
24082 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
24083 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
24084 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
24085 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
24086
24087 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
24088 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
24089 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
24090 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
24091 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
24092 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
24093 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
24094
24095 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
24096 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
24097 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
24098 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
24099 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
24100 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
24101 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
24102 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
24103
24104 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
24105 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
24106 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
24107 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
24108 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
24109 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
24110 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
24111 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
24112
24113 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
24114 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
24115 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
24116 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
24117
24118 };
24119
24120 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
24121 in the current target ISA to allow the user to compile particular modules
24122 with different target specific options that differ from the command line
24123 options. */
24124 static void
24125 ix86_init_mmx_sse_builtins (void)
24126 {
24127 const struct builtin_description * d;
24128 enum ix86_builtin_func_type ftype;
24129 size_t i;
24130
24131 /* Add all special builtins with variable number of operands. */
24132 for (i = 0, d = bdesc_special_args;
24133 i < ARRAY_SIZE (bdesc_special_args);
24134 i++, d++)
24135 {
24136 if (d->name == 0)
24137 continue;
24138
24139 ftype = (enum ix86_builtin_func_type) d->flag;
24140 def_builtin (d->mask, d->name, ftype, d->code);
24141 }
24142
24143 /* Add all builtins with variable number of operands. */
24144 for (i = 0, d = bdesc_args;
24145 i < ARRAY_SIZE (bdesc_args);
24146 i++, d++)
24147 {
24148 if (d->name == 0)
24149 continue;
24150
24151 ftype = (enum ix86_builtin_func_type) d->flag;
24152 def_builtin_const (d->mask, d->name, ftype, d->code);
24153 }
24154
24155 /* pcmpestr[im] insns. */
24156 for (i = 0, d = bdesc_pcmpestr;
24157 i < ARRAY_SIZE (bdesc_pcmpestr);
24158 i++, d++)
24159 {
24160 if (d->code == IX86_BUILTIN_PCMPESTRM128)
24161 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
24162 else
24163 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
24164 def_builtin_const (d->mask, d->name, ftype, d->code);
24165 }
24166
24167 /* pcmpistr[im] insns. */
24168 for (i = 0, d = bdesc_pcmpistr;
24169 i < ARRAY_SIZE (bdesc_pcmpistr);
24170 i++, d++)
24171 {
24172 if (d->code == IX86_BUILTIN_PCMPISTRM128)
24173 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
24174 else
24175 ftype = INT_FTYPE_V16QI_V16QI_INT;
24176 def_builtin_const (d->mask, d->name, ftype, d->code);
24177 }
24178
24179 /* comi/ucomi insns. */
24180 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
24181 {
24182 if (d->mask == OPTION_MASK_ISA_SSE2)
24183 ftype = INT_FTYPE_V2DF_V2DF;
24184 else
24185 ftype = INT_FTYPE_V4SF_V4SF;
24186 def_builtin_const (d->mask, d->name, ftype, d->code);
24187 }
24188
24189 /* SSE */
24190 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
24191 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
24192 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
24193 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
24194
24195 /* SSE or 3DNow!A */
24196 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
24197 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
24198 IX86_BUILTIN_MASKMOVQ);
24199
24200 /* SSE2 */
24201 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
24202 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
24203
24204 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
24205 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
24206 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
24207 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
24208
24209 /* SSE3. */
24210 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
24211 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
24212 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
24213 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
24214
24215 /* AES */
24216 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
24217 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
24218 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
24219 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
24220 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
24221 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
24222 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
24223 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
24224 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
24225 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
24226 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
24227 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
24228
24229 /* PCLMUL */
24230 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
24231 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
24232
24233 /* MMX access to the vec_init patterns. */
24234 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
24235 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
24236
24237 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
24238 V4HI_FTYPE_HI_HI_HI_HI,
24239 IX86_BUILTIN_VEC_INIT_V4HI);
24240
24241 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
24242 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
24243 IX86_BUILTIN_VEC_INIT_V8QI);
24244
24245 /* Access to the vec_extract patterns. */
24246 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
24247 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
24248 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
24249 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
24250 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
24251 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
24252 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
24253 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
24254 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
24255 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
24256
24257 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
24258 "__builtin_ia32_vec_ext_v4hi",
24259 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
24260
24261 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
24262 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
24263
24264 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
24265 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
24266
24267 /* Access to the vec_set patterns. */
24268 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
24269 "__builtin_ia32_vec_set_v2di",
24270 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
24271
24272 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
24273 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
24274
24275 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
24276 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
24277
24278 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
24279 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
24280
24281 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
24282 "__builtin_ia32_vec_set_v4hi",
24283 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
24284
24285 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
24286 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
24287
24288 /* Add FMA4 multi-arg argument instructions */
24289 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
24290 {
24291 if (d->name == 0)
24292 continue;
24293
24294 ftype = (enum ix86_builtin_func_type) d->flag;
24295 def_builtin_const (d->mask, d->name, ftype, d->code);
24296 }
24297 }
24298
24299 /* Internal method for ix86_init_builtins. */
24300
24301 static void
24302 ix86_init_builtins_va_builtins_abi (void)
24303 {
24304 tree ms_va_ref, sysv_va_ref;
24305 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
24306 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
24307 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
24308 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
24309
24310 if (!TARGET_64BIT)
24311 return;
24312 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
24313 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
24314 ms_va_ref = build_reference_type (ms_va_list_type_node);
24315 sysv_va_ref =
24316 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
24317
24318 fnvoid_va_end_ms =
24319 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
24320 fnvoid_va_start_ms =
24321 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
24322 fnvoid_va_end_sysv =
24323 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
24324 fnvoid_va_start_sysv =
24325 build_varargs_function_type_list (void_type_node, sysv_va_ref,
24326 NULL_TREE);
24327 fnvoid_va_copy_ms =
24328 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
24329 NULL_TREE);
24330 fnvoid_va_copy_sysv =
24331 build_function_type_list (void_type_node, sysv_va_ref,
24332 sysv_va_ref, NULL_TREE);
24333
24334 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
24335 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
24336 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
24337 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
24338 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
24339 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
24340 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
24341 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24342 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
24343 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24344 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
24345 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24346 }
24347
24348 static void
24349 ix86_init_builtin_types (void)
24350 {
24351 tree float128_type_node, float80_type_node;
24352
24353 /* The __float80 type. */
24354 float80_type_node = long_double_type_node;
24355 if (TYPE_MODE (float80_type_node) != XFmode)
24356 {
24357 /* The __float80 type. */
24358 float80_type_node = make_node (REAL_TYPE);
24359
24360 TYPE_PRECISION (float80_type_node) = 80;
24361 layout_type (float80_type_node);
24362 }
24363 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
24364
24365 /* The __float128 type. */
24366 float128_type_node = make_node (REAL_TYPE);
24367 TYPE_PRECISION (float128_type_node) = 128;
24368 layout_type (float128_type_node);
24369 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
24370
24371 /* This macro is built by i386-builtin-types.awk. */
24372 DEFINE_BUILTIN_PRIMITIVE_TYPES;
24373 }
24374
24375 static void
24376 ix86_init_builtins (void)
24377 {
24378 tree t;
24379
24380 ix86_init_builtin_types ();
24381
24382 /* TFmode support builtins. */
24383 def_builtin_const (0, "__builtin_infq",
24384 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
24385 def_builtin_const (0, "__builtin_huge_valq",
24386 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
24387
24388 /* We will expand them to normal call if SSE2 isn't available since
24389 they are used by libgcc. */
24390 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
24391 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
24392 BUILT_IN_MD, "__fabstf2", NULL_TREE);
24393 TREE_READONLY (t) = 1;
24394 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
24395
24396 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
24397 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
24398 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
24399 TREE_READONLY (t) = 1;
24400 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
24401
24402 ix86_init_mmx_sse_builtins ();
24403
24404 if (TARGET_64BIT)
24405 ix86_init_builtins_va_builtins_abi ();
24406 }
24407
24408 /* Return the ix86 builtin for CODE. */
24409
24410 static tree
24411 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
24412 {
24413 if (code >= IX86_BUILTIN_MAX)
24414 return error_mark_node;
24415
24416 return ix86_builtins[code];
24417 }
24418
24419 /* Errors in the source file can cause expand_expr to return const0_rtx
24420 where we expect a vector. To avoid crashing, use one of the vector
24421 clear instructions. */
24422 static rtx
24423 safe_vector_operand (rtx x, enum machine_mode mode)
24424 {
24425 if (x == const0_rtx)
24426 x = CONST0_RTX (mode);
24427 return x;
24428 }
24429
24430 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
24431
24432 static rtx
24433 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
24434 {
24435 rtx pat;
24436 tree arg0 = CALL_EXPR_ARG (exp, 0);
24437 tree arg1 = CALL_EXPR_ARG (exp, 1);
24438 rtx op0 = expand_normal (arg0);
24439 rtx op1 = expand_normal (arg1);
24440 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24441 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24442 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24443
24444 if (VECTOR_MODE_P (mode0))
24445 op0 = safe_vector_operand (op0, mode0);
24446 if (VECTOR_MODE_P (mode1))
24447 op1 = safe_vector_operand (op1, mode1);
24448
24449 if (optimize || !target
24450 || GET_MODE (target) != tmode
24451 || !insn_data[icode].operand[0].predicate (target, tmode))
24452 target = gen_reg_rtx (tmode);
24453
24454 if (GET_MODE (op1) == SImode && mode1 == TImode)
24455 {
24456 rtx x = gen_reg_rtx (V4SImode);
24457 emit_insn (gen_sse2_loadd (x, op1));
24458 op1 = gen_lowpart (TImode, x);
24459 }
24460
24461 if (!insn_data[icode].operand[1].predicate (op0, mode0))
24462 op0 = copy_to_mode_reg (mode0, op0);
24463 if (!insn_data[icode].operand[2].predicate (op1, mode1))
24464 op1 = copy_to_mode_reg (mode1, op1);
24465
24466 pat = GEN_FCN (icode) (target, op0, op1);
24467 if (! pat)
24468 return 0;
24469
24470 emit_insn (pat);
24471
24472 return target;
24473 }
24474
24475 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
24476
24477 static rtx
24478 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
24479 enum ix86_builtin_func_type m_type,
24480 enum rtx_code sub_code)
24481 {
24482 rtx pat;
24483 int i;
24484 int nargs;
24485 bool comparison_p = false;
24486 bool tf_p = false;
24487 bool last_arg_constant = false;
24488 int num_memory = 0;
24489 struct {
24490 rtx op;
24491 enum machine_mode mode;
24492 } args[4];
24493
24494 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24495
24496 switch (m_type)
24497 {
24498 case MULTI_ARG_4_DF2_DI_I:
24499 case MULTI_ARG_4_DF2_DI_I1:
24500 case MULTI_ARG_4_SF2_SI_I:
24501 case MULTI_ARG_4_SF2_SI_I1:
24502 nargs = 4;
24503 last_arg_constant = true;
24504 break;
24505
24506 case MULTI_ARG_3_SF:
24507 case MULTI_ARG_3_DF:
24508 case MULTI_ARG_3_SF2:
24509 case MULTI_ARG_3_DF2:
24510 case MULTI_ARG_3_DI:
24511 case MULTI_ARG_3_SI:
24512 case MULTI_ARG_3_SI_DI:
24513 case MULTI_ARG_3_HI:
24514 case MULTI_ARG_3_HI_SI:
24515 case MULTI_ARG_3_QI:
24516 case MULTI_ARG_3_DI2:
24517 case MULTI_ARG_3_SI2:
24518 case MULTI_ARG_3_HI2:
24519 case MULTI_ARG_3_QI2:
24520 nargs = 3;
24521 break;
24522
24523 case MULTI_ARG_2_SF:
24524 case MULTI_ARG_2_DF:
24525 case MULTI_ARG_2_DI:
24526 case MULTI_ARG_2_SI:
24527 case MULTI_ARG_2_HI:
24528 case MULTI_ARG_2_QI:
24529 nargs = 2;
24530 break;
24531
24532 case MULTI_ARG_2_DI_IMM:
24533 case MULTI_ARG_2_SI_IMM:
24534 case MULTI_ARG_2_HI_IMM:
24535 case MULTI_ARG_2_QI_IMM:
24536 nargs = 2;
24537 last_arg_constant = true;
24538 break;
24539
24540 case MULTI_ARG_1_SF:
24541 case MULTI_ARG_1_DF:
24542 case MULTI_ARG_1_SF2:
24543 case MULTI_ARG_1_DF2:
24544 case MULTI_ARG_1_DI:
24545 case MULTI_ARG_1_SI:
24546 case MULTI_ARG_1_HI:
24547 case MULTI_ARG_1_QI:
24548 case MULTI_ARG_1_SI_DI:
24549 case MULTI_ARG_1_HI_DI:
24550 case MULTI_ARG_1_HI_SI:
24551 case MULTI_ARG_1_QI_DI:
24552 case MULTI_ARG_1_QI_SI:
24553 case MULTI_ARG_1_QI_HI:
24554 nargs = 1;
24555 break;
24556
24557 case MULTI_ARG_2_DI_CMP:
24558 case MULTI_ARG_2_SI_CMP:
24559 case MULTI_ARG_2_HI_CMP:
24560 case MULTI_ARG_2_QI_CMP:
24561 nargs = 2;
24562 comparison_p = true;
24563 break;
24564
24565 case MULTI_ARG_2_SF_TF:
24566 case MULTI_ARG_2_DF_TF:
24567 case MULTI_ARG_2_DI_TF:
24568 case MULTI_ARG_2_SI_TF:
24569 case MULTI_ARG_2_HI_TF:
24570 case MULTI_ARG_2_QI_TF:
24571 nargs = 2;
24572 tf_p = true;
24573 break;
24574
24575 default:
24576 gcc_unreachable ();
24577 }
24578
24579 if (optimize || !target
24580 || GET_MODE (target) != tmode
24581 || !insn_data[icode].operand[0].predicate (target, tmode))
24582 target = gen_reg_rtx (tmode);
24583
24584 gcc_assert (nargs <= 4);
24585
24586 for (i = 0; i < nargs; i++)
24587 {
24588 tree arg = CALL_EXPR_ARG (exp, i);
24589 rtx op = expand_normal (arg);
24590 int adjust = (comparison_p) ? 1 : 0;
24591 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
24592
24593 if (last_arg_constant && i == nargs-1)
24594 {
24595 if (!CONST_INT_P (op))
24596 {
24597 error ("last argument must be an immediate");
24598 return gen_reg_rtx (tmode);
24599 }
24600 }
24601 else
24602 {
24603 if (VECTOR_MODE_P (mode))
24604 op = safe_vector_operand (op, mode);
24605
24606 /* If we aren't optimizing, only allow one memory operand to be
24607 generated. */
24608 if (memory_operand (op, mode))
24609 num_memory++;
24610
24611 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
24612
24613 if (optimize
24614 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
24615 || num_memory > 1)
24616 op = force_reg (mode, op);
24617 }
24618
24619 args[i].op = op;
24620 args[i].mode = mode;
24621 }
24622
24623 switch (nargs)
24624 {
24625 case 1:
24626 pat = GEN_FCN (icode) (target, args[0].op);
24627 break;
24628
24629 case 2:
24630 if (tf_p)
24631 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
24632 GEN_INT ((int)sub_code));
24633 else if (! comparison_p)
24634 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24635 else
24636 {
24637 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
24638 args[0].op,
24639 args[1].op);
24640
24641 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
24642 }
24643 break;
24644
24645 case 3:
24646 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24647 break;
24648
24649 case 4:
24650 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
24651 break;
24652
24653 default:
24654 gcc_unreachable ();
24655 }
24656
24657 if (! pat)
24658 return 0;
24659
24660 emit_insn (pat);
24661 return target;
24662 }
24663
24664 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
24665 insns with vec_merge. */
24666
24667 static rtx
24668 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
24669 rtx target)
24670 {
24671 rtx pat;
24672 tree arg0 = CALL_EXPR_ARG (exp, 0);
24673 rtx op1, op0 = expand_normal (arg0);
24674 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24675 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24676
24677 if (optimize || !target
24678 || GET_MODE (target) != tmode
24679 || !insn_data[icode].operand[0].predicate (target, tmode))
24680 target = gen_reg_rtx (tmode);
24681
24682 if (VECTOR_MODE_P (mode0))
24683 op0 = safe_vector_operand (op0, mode0);
24684
24685 if ((optimize && !register_operand (op0, mode0))
24686 || !insn_data[icode].operand[1].predicate (op0, mode0))
24687 op0 = copy_to_mode_reg (mode0, op0);
24688
24689 op1 = op0;
24690 if (!insn_data[icode].operand[2].predicate (op1, mode0))
24691 op1 = copy_to_mode_reg (mode0, op1);
24692
24693 pat = GEN_FCN (icode) (target, op0, op1);
24694 if (! pat)
24695 return 0;
24696 emit_insn (pat);
24697 return target;
24698 }
24699
24700 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
24701
24702 static rtx
24703 ix86_expand_sse_compare (const struct builtin_description *d,
24704 tree exp, rtx target, bool swap)
24705 {
24706 rtx pat;
24707 tree arg0 = CALL_EXPR_ARG (exp, 0);
24708 tree arg1 = CALL_EXPR_ARG (exp, 1);
24709 rtx op0 = expand_normal (arg0);
24710 rtx op1 = expand_normal (arg1);
24711 rtx op2;
24712 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
24713 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
24714 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
24715 enum rtx_code comparison = d->comparison;
24716
24717 if (VECTOR_MODE_P (mode0))
24718 op0 = safe_vector_operand (op0, mode0);
24719 if (VECTOR_MODE_P (mode1))
24720 op1 = safe_vector_operand (op1, mode1);
24721
24722 /* Swap operands if we have a comparison that isn't available in
24723 hardware. */
24724 if (swap)
24725 {
24726 rtx tmp = gen_reg_rtx (mode1);
24727 emit_move_insn (tmp, op1);
24728 op1 = op0;
24729 op0 = tmp;
24730 }
24731
24732 if (optimize || !target
24733 || GET_MODE (target) != tmode
24734 || !insn_data[d->icode].operand[0].predicate (target, tmode))
24735 target = gen_reg_rtx (tmode);
24736
24737 if ((optimize && !register_operand (op0, mode0))
24738 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
24739 op0 = copy_to_mode_reg (mode0, op0);
24740 if ((optimize && !register_operand (op1, mode1))
24741 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
24742 op1 = copy_to_mode_reg (mode1, op1);
24743
24744 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
24745 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
24746 if (! pat)
24747 return 0;
24748 emit_insn (pat);
24749 return target;
24750 }
24751
24752 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
24753
24754 static rtx
24755 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
24756 rtx target)
24757 {
24758 rtx pat;
24759 tree arg0 = CALL_EXPR_ARG (exp, 0);
24760 tree arg1 = CALL_EXPR_ARG (exp, 1);
24761 rtx op0 = expand_normal (arg0);
24762 rtx op1 = expand_normal (arg1);
24763 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24764 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24765 enum rtx_code comparison = d->comparison;
24766
24767 if (VECTOR_MODE_P (mode0))
24768 op0 = safe_vector_operand (op0, mode0);
24769 if (VECTOR_MODE_P (mode1))
24770 op1 = safe_vector_operand (op1, mode1);
24771
24772 /* Swap operands if we have a comparison that isn't available in
24773 hardware. */
24774 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24775 {
24776 rtx tmp = op1;
24777 op1 = op0;
24778 op0 = tmp;
24779 }
24780
24781 target = gen_reg_rtx (SImode);
24782 emit_move_insn (target, const0_rtx);
24783 target = gen_rtx_SUBREG (QImode, target, 0);
24784
24785 if ((optimize && !register_operand (op0, mode0))
24786 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24787 op0 = copy_to_mode_reg (mode0, op0);
24788 if ((optimize && !register_operand (op1, mode1))
24789 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24790 op1 = copy_to_mode_reg (mode1, op1);
24791
24792 pat = GEN_FCN (d->icode) (op0, op1);
24793 if (! pat)
24794 return 0;
24795 emit_insn (pat);
24796 emit_insn (gen_rtx_SET (VOIDmode,
24797 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24798 gen_rtx_fmt_ee (comparison, QImode,
24799 SET_DEST (pat),
24800 const0_rtx)));
24801
24802 return SUBREG_REG (target);
24803 }
24804
24805 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24806
24807 static rtx
24808 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24809 rtx target)
24810 {
24811 rtx pat;
24812 tree arg0 = CALL_EXPR_ARG (exp, 0);
24813 tree arg1 = CALL_EXPR_ARG (exp, 1);
24814 rtx op0 = expand_normal (arg0);
24815 rtx op1 = expand_normal (arg1);
24816 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24817 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24818 enum rtx_code comparison = d->comparison;
24819
24820 if (VECTOR_MODE_P (mode0))
24821 op0 = safe_vector_operand (op0, mode0);
24822 if (VECTOR_MODE_P (mode1))
24823 op1 = safe_vector_operand (op1, mode1);
24824
24825 target = gen_reg_rtx (SImode);
24826 emit_move_insn (target, const0_rtx);
24827 target = gen_rtx_SUBREG (QImode, target, 0);
24828
24829 if ((optimize && !register_operand (op0, mode0))
24830 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24831 op0 = copy_to_mode_reg (mode0, op0);
24832 if ((optimize && !register_operand (op1, mode1))
24833 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24834 op1 = copy_to_mode_reg (mode1, op1);
24835
24836 pat = GEN_FCN (d->icode) (op0, op1);
24837 if (! pat)
24838 return 0;
24839 emit_insn (pat);
24840 emit_insn (gen_rtx_SET (VOIDmode,
24841 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24842 gen_rtx_fmt_ee (comparison, QImode,
24843 SET_DEST (pat),
24844 const0_rtx)));
24845
24846 return SUBREG_REG (target);
24847 }
24848
24849 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24850
24851 static rtx
24852 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24853 tree exp, rtx target)
24854 {
24855 rtx pat;
24856 tree arg0 = CALL_EXPR_ARG (exp, 0);
24857 tree arg1 = CALL_EXPR_ARG (exp, 1);
24858 tree arg2 = CALL_EXPR_ARG (exp, 2);
24859 tree arg3 = CALL_EXPR_ARG (exp, 3);
24860 tree arg4 = CALL_EXPR_ARG (exp, 4);
24861 rtx scratch0, scratch1;
24862 rtx op0 = expand_normal (arg0);
24863 rtx op1 = expand_normal (arg1);
24864 rtx op2 = expand_normal (arg2);
24865 rtx op3 = expand_normal (arg3);
24866 rtx op4 = expand_normal (arg4);
24867 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24868
24869 tmode0 = insn_data[d->icode].operand[0].mode;
24870 tmode1 = insn_data[d->icode].operand[1].mode;
24871 modev2 = insn_data[d->icode].operand[2].mode;
24872 modei3 = insn_data[d->icode].operand[3].mode;
24873 modev4 = insn_data[d->icode].operand[4].mode;
24874 modei5 = insn_data[d->icode].operand[5].mode;
24875 modeimm = insn_data[d->icode].operand[6].mode;
24876
24877 if (VECTOR_MODE_P (modev2))
24878 op0 = safe_vector_operand (op0, modev2);
24879 if (VECTOR_MODE_P (modev4))
24880 op2 = safe_vector_operand (op2, modev4);
24881
24882 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24883 op0 = copy_to_mode_reg (modev2, op0);
24884 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
24885 op1 = copy_to_mode_reg (modei3, op1);
24886 if ((optimize && !register_operand (op2, modev4))
24887 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
24888 op2 = copy_to_mode_reg (modev4, op2);
24889 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
24890 op3 = copy_to_mode_reg (modei5, op3);
24891
24892 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
24893 {
24894 error ("the fifth argument must be a 8-bit immediate");
24895 return const0_rtx;
24896 }
24897
24898 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24899 {
24900 if (optimize || !target
24901 || GET_MODE (target) != tmode0
24902 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24903 target = gen_reg_rtx (tmode0);
24904
24905 scratch1 = gen_reg_rtx (tmode1);
24906
24907 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24908 }
24909 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24910 {
24911 if (optimize || !target
24912 || GET_MODE (target) != tmode1
24913 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
24914 target = gen_reg_rtx (tmode1);
24915
24916 scratch0 = gen_reg_rtx (tmode0);
24917
24918 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24919 }
24920 else
24921 {
24922 gcc_assert (d->flag);
24923
24924 scratch0 = gen_reg_rtx (tmode0);
24925 scratch1 = gen_reg_rtx (tmode1);
24926
24927 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24928 }
24929
24930 if (! pat)
24931 return 0;
24932
24933 emit_insn (pat);
24934
24935 if (d->flag)
24936 {
24937 target = gen_reg_rtx (SImode);
24938 emit_move_insn (target, const0_rtx);
24939 target = gen_rtx_SUBREG (QImode, target, 0);
24940
24941 emit_insn
24942 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24943 gen_rtx_fmt_ee (EQ, QImode,
24944 gen_rtx_REG ((enum machine_mode) d->flag,
24945 FLAGS_REG),
24946 const0_rtx)));
24947 return SUBREG_REG (target);
24948 }
24949 else
24950 return target;
24951 }
24952
24953
24954 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24955
24956 static rtx
24957 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24958 tree exp, rtx target)
24959 {
24960 rtx pat;
24961 tree arg0 = CALL_EXPR_ARG (exp, 0);
24962 tree arg1 = CALL_EXPR_ARG (exp, 1);
24963 tree arg2 = CALL_EXPR_ARG (exp, 2);
24964 rtx scratch0, scratch1;
24965 rtx op0 = expand_normal (arg0);
24966 rtx op1 = expand_normal (arg1);
24967 rtx op2 = expand_normal (arg2);
24968 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24969
24970 tmode0 = insn_data[d->icode].operand[0].mode;
24971 tmode1 = insn_data[d->icode].operand[1].mode;
24972 modev2 = insn_data[d->icode].operand[2].mode;
24973 modev3 = insn_data[d->icode].operand[3].mode;
24974 modeimm = insn_data[d->icode].operand[4].mode;
24975
24976 if (VECTOR_MODE_P (modev2))
24977 op0 = safe_vector_operand (op0, modev2);
24978 if (VECTOR_MODE_P (modev3))
24979 op1 = safe_vector_operand (op1, modev3);
24980
24981 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24982 op0 = copy_to_mode_reg (modev2, op0);
24983 if ((optimize && !register_operand (op1, modev3))
24984 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
24985 op1 = copy_to_mode_reg (modev3, op1);
24986
24987 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
24988 {
24989 error ("the third argument must be a 8-bit immediate");
24990 return const0_rtx;
24991 }
24992
24993 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24994 {
24995 if (optimize || !target
24996 || GET_MODE (target) != tmode0
24997 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24998 target = gen_reg_rtx (tmode0);
24999
25000 scratch1 = gen_reg_rtx (tmode1);
25001
25002 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
25003 }
25004 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
25005 {
25006 if (optimize || !target
25007 || GET_MODE (target) != tmode1
25008 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
25009 target = gen_reg_rtx (tmode1);
25010
25011 scratch0 = gen_reg_rtx (tmode0);
25012
25013 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
25014 }
25015 else
25016 {
25017 gcc_assert (d->flag);
25018
25019 scratch0 = gen_reg_rtx (tmode0);
25020 scratch1 = gen_reg_rtx (tmode1);
25021
25022 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
25023 }
25024
25025 if (! pat)
25026 return 0;
25027
25028 emit_insn (pat);
25029
25030 if (d->flag)
25031 {
25032 target = gen_reg_rtx (SImode);
25033 emit_move_insn (target, const0_rtx);
25034 target = gen_rtx_SUBREG (QImode, target, 0);
25035
25036 emit_insn
25037 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
25038 gen_rtx_fmt_ee (EQ, QImode,
25039 gen_rtx_REG ((enum machine_mode) d->flag,
25040 FLAGS_REG),
25041 const0_rtx)));
25042 return SUBREG_REG (target);
25043 }
25044 else
25045 return target;
25046 }
25047
25048 /* Subroutine of ix86_expand_builtin to take care of insns with
25049 variable number of operands. */
25050
25051 static rtx
25052 ix86_expand_args_builtin (const struct builtin_description *d,
25053 tree exp, rtx target)
25054 {
25055 rtx pat, real_target;
25056 unsigned int i, nargs;
25057 unsigned int nargs_constant = 0;
25058 int num_memory = 0;
25059 struct
25060 {
25061 rtx op;
25062 enum machine_mode mode;
25063 } args[4];
25064 bool last_arg_count = false;
25065 enum insn_code icode = d->icode;
25066 const struct insn_data_d *insn_p = &insn_data[icode];
25067 enum machine_mode tmode = insn_p->operand[0].mode;
25068 enum machine_mode rmode = VOIDmode;
25069 bool swap = false;
25070 enum rtx_code comparison = d->comparison;
25071
25072 switch ((enum ix86_builtin_func_type) d->flag)
25073 {
25074 case INT_FTYPE_V8SF_V8SF_PTEST:
25075 case INT_FTYPE_V4DI_V4DI_PTEST:
25076 case INT_FTYPE_V4DF_V4DF_PTEST:
25077 case INT_FTYPE_V4SF_V4SF_PTEST:
25078 case INT_FTYPE_V2DI_V2DI_PTEST:
25079 case INT_FTYPE_V2DF_V2DF_PTEST:
25080 return ix86_expand_sse_ptest (d, exp, target);
25081 case FLOAT128_FTYPE_FLOAT128:
25082 case FLOAT_FTYPE_FLOAT:
25083 case INT_FTYPE_INT:
25084 case UINT64_FTYPE_INT:
25085 case UINT16_FTYPE_UINT16:
25086 case INT64_FTYPE_INT64:
25087 case INT64_FTYPE_V4SF:
25088 case INT64_FTYPE_V2DF:
25089 case INT_FTYPE_V16QI:
25090 case INT_FTYPE_V8QI:
25091 case INT_FTYPE_V8SF:
25092 case INT_FTYPE_V4DF:
25093 case INT_FTYPE_V4SF:
25094 case INT_FTYPE_V2DF:
25095 case V16QI_FTYPE_V16QI:
25096 case V8SI_FTYPE_V8SF:
25097 case V8SI_FTYPE_V4SI:
25098 case V8HI_FTYPE_V8HI:
25099 case V8HI_FTYPE_V16QI:
25100 case V8QI_FTYPE_V8QI:
25101 case V8SF_FTYPE_V8SF:
25102 case V8SF_FTYPE_V8SI:
25103 case V8SF_FTYPE_V4SF:
25104 case V8SF_FTYPE_V8HI:
25105 case V4SI_FTYPE_V4SI:
25106 case V4SI_FTYPE_V16QI:
25107 case V4SI_FTYPE_V4SF:
25108 case V4SI_FTYPE_V8SI:
25109 case V4SI_FTYPE_V8HI:
25110 case V4SI_FTYPE_V4DF:
25111 case V4SI_FTYPE_V2DF:
25112 case V4HI_FTYPE_V4HI:
25113 case V4DF_FTYPE_V4DF:
25114 case V4DF_FTYPE_V4SI:
25115 case V4DF_FTYPE_V4SF:
25116 case V4DF_FTYPE_V2DF:
25117 case V4SF_FTYPE_V4SF:
25118 case V4SF_FTYPE_V4SI:
25119 case V4SF_FTYPE_V8SF:
25120 case V4SF_FTYPE_V4DF:
25121 case V4SF_FTYPE_V8HI:
25122 case V4SF_FTYPE_V2DF:
25123 case V2DI_FTYPE_V2DI:
25124 case V2DI_FTYPE_V16QI:
25125 case V2DI_FTYPE_V8HI:
25126 case V2DI_FTYPE_V4SI:
25127 case V2DF_FTYPE_V2DF:
25128 case V2DF_FTYPE_V4SI:
25129 case V2DF_FTYPE_V4DF:
25130 case V2DF_FTYPE_V4SF:
25131 case V2DF_FTYPE_V2SI:
25132 case V2SI_FTYPE_V2SI:
25133 case V2SI_FTYPE_V4SF:
25134 case V2SI_FTYPE_V2SF:
25135 case V2SI_FTYPE_V2DF:
25136 case V2SF_FTYPE_V2SF:
25137 case V2SF_FTYPE_V2SI:
25138 nargs = 1;
25139 break;
25140 case V4SF_FTYPE_V4SF_VEC_MERGE:
25141 case V2DF_FTYPE_V2DF_VEC_MERGE:
25142 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
25143 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
25144 case V16QI_FTYPE_V16QI_V16QI:
25145 case V16QI_FTYPE_V8HI_V8HI:
25146 case V8QI_FTYPE_V8QI_V8QI:
25147 case V8QI_FTYPE_V4HI_V4HI:
25148 case V8HI_FTYPE_V8HI_V8HI:
25149 case V8HI_FTYPE_V16QI_V16QI:
25150 case V8HI_FTYPE_V4SI_V4SI:
25151 case V8SF_FTYPE_V8SF_V8SF:
25152 case V8SF_FTYPE_V8SF_V8SI:
25153 case V4SI_FTYPE_V4SI_V4SI:
25154 case V4SI_FTYPE_V8HI_V8HI:
25155 case V4SI_FTYPE_V4SF_V4SF:
25156 case V4SI_FTYPE_V2DF_V2DF:
25157 case V4HI_FTYPE_V4HI_V4HI:
25158 case V4HI_FTYPE_V8QI_V8QI:
25159 case V4HI_FTYPE_V2SI_V2SI:
25160 case V4DF_FTYPE_V4DF_V4DF:
25161 case V4DF_FTYPE_V4DF_V4DI:
25162 case V4SF_FTYPE_V4SF_V4SF:
25163 case V4SF_FTYPE_V4SF_V4SI:
25164 case V4SF_FTYPE_V4SF_V2SI:
25165 case V4SF_FTYPE_V4SF_V2DF:
25166 case V4SF_FTYPE_V4SF_DI:
25167 case V4SF_FTYPE_V4SF_SI:
25168 case V2DI_FTYPE_V2DI_V2DI:
25169 case V2DI_FTYPE_V16QI_V16QI:
25170 case V2DI_FTYPE_V4SI_V4SI:
25171 case V2DI_FTYPE_V2DI_V16QI:
25172 case V2DI_FTYPE_V2DF_V2DF:
25173 case V2SI_FTYPE_V2SI_V2SI:
25174 case V2SI_FTYPE_V4HI_V4HI:
25175 case V2SI_FTYPE_V2SF_V2SF:
25176 case V2DF_FTYPE_V2DF_V2DF:
25177 case V2DF_FTYPE_V2DF_V4SF:
25178 case V2DF_FTYPE_V2DF_V2DI:
25179 case V2DF_FTYPE_V2DF_DI:
25180 case V2DF_FTYPE_V2DF_SI:
25181 case V2SF_FTYPE_V2SF_V2SF:
25182 case V1DI_FTYPE_V1DI_V1DI:
25183 case V1DI_FTYPE_V8QI_V8QI:
25184 case V1DI_FTYPE_V2SI_V2SI:
25185 if (comparison == UNKNOWN)
25186 return ix86_expand_binop_builtin (icode, exp, target);
25187 nargs = 2;
25188 break;
25189 case V4SF_FTYPE_V4SF_V4SF_SWAP:
25190 case V2DF_FTYPE_V2DF_V2DF_SWAP:
25191 gcc_assert (comparison != UNKNOWN);
25192 nargs = 2;
25193 swap = true;
25194 break;
25195 case V8HI_FTYPE_V8HI_V8HI_COUNT:
25196 case V8HI_FTYPE_V8HI_SI_COUNT:
25197 case V4SI_FTYPE_V4SI_V4SI_COUNT:
25198 case V4SI_FTYPE_V4SI_SI_COUNT:
25199 case V4HI_FTYPE_V4HI_V4HI_COUNT:
25200 case V4HI_FTYPE_V4HI_SI_COUNT:
25201 case V2DI_FTYPE_V2DI_V2DI_COUNT:
25202 case V2DI_FTYPE_V2DI_SI_COUNT:
25203 case V2SI_FTYPE_V2SI_V2SI_COUNT:
25204 case V2SI_FTYPE_V2SI_SI_COUNT:
25205 case V1DI_FTYPE_V1DI_V1DI_COUNT:
25206 case V1DI_FTYPE_V1DI_SI_COUNT:
25207 nargs = 2;
25208 last_arg_count = true;
25209 break;
25210 case UINT64_FTYPE_UINT64_UINT64:
25211 case UINT_FTYPE_UINT_UINT:
25212 case UINT_FTYPE_UINT_USHORT:
25213 case UINT_FTYPE_UINT_UCHAR:
25214 case UINT16_FTYPE_UINT16_INT:
25215 case UINT8_FTYPE_UINT8_INT:
25216 nargs = 2;
25217 break;
25218 case V2DI_FTYPE_V2DI_INT_CONVERT:
25219 nargs = 2;
25220 rmode = V1TImode;
25221 nargs_constant = 1;
25222 break;
25223 case V8HI_FTYPE_V8HI_INT:
25224 case V8HI_FTYPE_V8SF_INT:
25225 case V8HI_FTYPE_V4SF_INT:
25226 case V8SF_FTYPE_V8SF_INT:
25227 case V4SI_FTYPE_V4SI_INT:
25228 case V4SI_FTYPE_V8SI_INT:
25229 case V4HI_FTYPE_V4HI_INT:
25230 case V4DF_FTYPE_V4DF_INT:
25231 case V4SF_FTYPE_V4SF_INT:
25232 case V4SF_FTYPE_V8SF_INT:
25233 case V2DI_FTYPE_V2DI_INT:
25234 case V2DF_FTYPE_V2DF_INT:
25235 case V2DF_FTYPE_V4DF_INT:
25236 nargs = 2;
25237 nargs_constant = 1;
25238 break;
25239 case V16QI_FTYPE_V16QI_V16QI_V16QI:
25240 case V8SF_FTYPE_V8SF_V8SF_V8SF:
25241 case V4DF_FTYPE_V4DF_V4DF_V4DF:
25242 case V4SF_FTYPE_V4SF_V4SF_V4SF:
25243 case V2DF_FTYPE_V2DF_V2DF_V2DF:
25244 nargs = 3;
25245 break;
25246 case V16QI_FTYPE_V16QI_V16QI_INT:
25247 case V8HI_FTYPE_V8HI_V8HI_INT:
25248 case V8SI_FTYPE_V8SI_V8SI_INT:
25249 case V8SI_FTYPE_V8SI_V4SI_INT:
25250 case V8SF_FTYPE_V8SF_V8SF_INT:
25251 case V8SF_FTYPE_V8SF_V4SF_INT:
25252 case V4SI_FTYPE_V4SI_V4SI_INT:
25253 case V4DF_FTYPE_V4DF_V4DF_INT:
25254 case V4DF_FTYPE_V4DF_V2DF_INT:
25255 case V4SF_FTYPE_V4SF_V4SF_INT:
25256 case V2DI_FTYPE_V2DI_V2DI_INT:
25257 case V2DF_FTYPE_V2DF_V2DF_INT:
25258 nargs = 3;
25259 nargs_constant = 1;
25260 break;
25261 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
25262 nargs = 3;
25263 rmode = V2DImode;
25264 nargs_constant = 1;
25265 break;
25266 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
25267 nargs = 3;
25268 rmode = DImode;
25269 nargs_constant = 1;
25270 break;
25271 case V2DI_FTYPE_V2DI_UINT_UINT:
25272 nargs = 3;
25273 nargs_constant = 2;
25274 break;
25275 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
25276 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
25277 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
25278 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
25279 nargs = 4;
25280 nargs_constant = 1;
25281 break;
25282 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
25283 nargs = 4;
25284 nargs_constant = 2;
25285 break;
25286 default:
25287 gcc_unreachable ();
25288 }
25289
25290 gcc_assert (nargs <= ARRAY_SIZE (args));
25291
25292 if (comparison != UNKNOWN)
25293 {
25294 gcc_assert (nargs == 2);
25295 return ix86_expand_sse_compare (d, exp, target, swap);
25296 }
25297
25298 if (rmode == VOIDmode || rmode == tmode)
25299 {
25300 if (optimize
25301 || target == 0
25302 || GET_MODE (target) != tmode
25303 || !insn_p->operand[0].predicate (target, tmode))
25304 target = gen_reg_rtx (tmode);
25305 real_target = target;
25306 }
25307 else
25308 {
25309 target = gen_reg_rtx (rmode);
25310 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
25311 }
25312
25313 for (i = 0; i < nargs; i++)
25314 {
25315 tree arg = CALL_EXPR_ARG (exp, i);
25316 rtx op = expand_normal (arg);
25317 enum machine_mode mode = insn_p->operand[i + 1].mode;
25318 bool match = insn_p->operand[i + 1].predicate (op, mode);
25319
25320 if (last_arg_count && (i + 1) == nargs)
25321 {
25322 /* SIMD shift insns take either an 8-bit immediate or
25323 register as count. But builtin functions take int as
25324 count. If count doesn't match, we put it in register. */
25325 if (!match)
25326 {
25327 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
25328 if (!insn_p->operand[i + 1].predicate (op, mode))
25329 op = copy_to_reg (op);
25330 }
25331 }
25332 else if ((nargs - i) <= nargs_constant)
25333 {
25334 if (!match)
25335 switch (icode)
25336 {
25337 case CODE_FOR_sse4_1_roundpd:
25338 case CODE_FOR_sse4_1_roundps:
25339 case CODE_FOR_sse4_1_roundsd:
25340 case CODE_FOR_sse4_1_roundss:
25341 case CODE_FOR_sse4_1_blendps:
25342 case CODE_FOR_avx_blendpd256:
25343 case CODE_FOR_avx_vpermilv4df:
25344 case CODE_FOR_avx_roundpd256:
25345 case CODE_FOR_avx_roundps256:
25346 error ("the last argument must be a 4-bit immediate");
25347 return const0_rtx;
25348
25349 case CODE_FOR_sse4_1_blendpd:
25350 case CODE_FOR_avx_vpermilv2df:
25351 case CODE_FOR_xop_vpermil2v2df3:
25352 case CODE_FOR_xop_vpermil2v4sf3:
25353 case CODE_FOR_xop_vpermil2v4df3:
25354 case CODE_FOR_xop_vpermil2v8sf3:
25355 error ("the last argument must be a 2-bit immediate");
25356 return const0_rtx;
25357
25358 case CODE_FOR_avx_vextractf128v4df:
25359 case CODE_FOR_avx_vextractf128v8sf:
25360 case CODE_FOR_avx_vextractf128v8si:
25361 case CODE_FOR_avx_vinsertf128v4df:
25362 case CODE_FOR_avx_vinsertf128v8sf:
25363 case CODE_FOR_avx_vinsertf128v8si:
25364 error ("the last argument must be a 1-bit immediate");
25365 return const0_rtx;
25366
25367 case CODE_FOR_avx_cmpsdv2df3:
25368 case CODE_FOR_avx_cmpssv4sf3:
25369 case CODE_FOR_avx_cmppdv2df3:
25370 case CODE_FOR_avx_cmppsv4sf3:
25371 case CODE_FOR_avx_cmppdv4df3:
25372 case CODE_FOR_avx_cmppsv8sf3:
25373 error ("the last argument must be a 5-bit immediate");
25374 return const0_rtx;
25375
25376 default:
25377 switch (nargs_constant)
25378 {
25379 case 2:
25380 if ((nargs - i) == nargs_constant)
25381 {
25382 error ("the next to last argument must be an 8-bit immediate");
25383 break;
25384 }
25385 case 1:
25386 error ("the last argument must be an 8-bit immediate");
25387 break;
25388 default:
25389 gcc_unreachable ();
25390 }
25391 return const0_rtx;
25392 }
25393 }
25394 else
25395 {
25396 if (VECTOR_MODE_P (mode))
25397 op = safe_vector_operand (op, mode);
25398
25399 /* If we aren't optimizing, only allow one memory operand to
25400 be generated. */
25401 if (memory_operand (op, mode))
25402 num_memory++;
25403
25404 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
25405 {
25406 if (optimize || !match || num_memory > 1)
25407 op = copy_to_mode_reg (mode, op);
25408 }
25409 else
25410 {
25411 op = copy_to_reg (op);
25412 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
25413 }
25414 }
25415
25416 args[i].op = op;
25417 args[i].mode = mode;
25418 }
25419
25420 switch (nargs)
25421 {
25422 case 1:
25423 pat = GEN_FCN (icode) (real_target, args[0].op);
25424 break;
25425 case 2:
25426 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
25427 break;
25428 case 3:
25429 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25430 args[2].op);
25431 break;
25432 case 4:
25433 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25434 args[2].op, args[3].op);
25435 break;
25436 default:
25437 gcc_unreachable ();
25438 }
25439
25440 if (! pat)
25441 return 0;
25442
25443 emit_insn (pat);
25444 return target;
25445 }
25446
25447 /* Subroutine of ix86_expand_builtin to take care of special insns
25448 with variable number of operands. */
25449
25450 static rtx
25451 ix86_expand_special_args_builtin (const struct builtin_description *d,
25452 tree exp, rtx target)
25453 {
25454 tree arg;
25455 rtx pat, op;
25456 unsigned int i, nargs, arg_adjust, memory;
25457 struct
25458 {
25459 rtx op;
25460 enum machine_mode mode;
25461 } args[3];
25462 enum insn_code icode = d->icode;
25463 bool last_arg_constant = false;
25464 const struct insn_data_d *insn_p = &insn_data[icode];
25465 enum machine_mode tmode = insn_p->operand[0].mode;
25466 enum { load, store } klass;
25467
25468 switch ((enum ix86_builtin_func_type) d->flag)
25469 {
25470 case VOID_FTYPE_VOID:
25471 emit_insn (GEN_FCN (icode) (target));
25472 return 0;
25473 case VOID_FTYPE_UINT64:
25474 case VOID_FTYPE_UNSIGNED:
25475 nargs = 0;
25476 klass = store;
25477 memory = 0;
25478 break;
25479 break;
25480 case UINT64_FTYPE_VOID:
25481 case UNSIGNED_FTYPE_VOID:
25482 case UINT16_FTYPE_VOID:
25483 nargs = 0;
25484 klass = load;
25485 memory = 0;
25486 break;
25487 case UINT64_FTYPE_PUNSIGNED:
25488 case V2DI_FTYPE_PV2DI:
25489 case V32QI_FTYPE_PCCHAR:
25490 case V16QI_FTYPE_PCCHAR:
25491 case V8SF_FTYPE_PCV4SF:
25492 case V8SF_FTYPE_PCFLOAT:
25493 case V4SF_FTYPE_PCFLOAT:
25494 case V4DF_FTYPE_PCV2DF:
25495 case V4DF_FTYPE_PCDOUBLE:
25496 case V2DF_FTYPE_PCDOUBLE:
25497 case VOID_FTYPE_PVOID:
25498 nargs = 1;
25499 klass = load;
25500 memory = 0;
25501 break;
25502 case VOID_FTYPE_PV2SF_V4SF:
25503 case VOID_FTYPE_PV4DI_V4DI:
25504 case VOID_FTYPE_PV2DI_V2DI:
25505 case VOID_FTYPE_PCHAR_V32QI:
25506 case VOID_FTYPE_PCHAR_V16QI:
25507 case VOID_FTYPE_PFLOAT_V8SF:
25508 case VOID_FTYPE_PFLOAT_V4SF:
25509 case VOID_FTYPE_PDOUBLE_V4DF:
25510 case VOID_FTYPE_PDOUBLE_V2DF:
25511 case VOID_FTYPE_PULONGLONG_ULONGLONG:
25512 case VOID_FTYPE_PINT_INT:
25513 nargs = 1;
25514 klass = store;
25515 /* Reserve memory operand for target. */
25516 memory = ARRAY_SIZE (args);
25517 break;
25518 case V4SF_FTYPE_V4SF_PCV2SF:
25519 case V2DF_FTYPE_V2DF_PCDOUBLE:
25520 nargs = 2;
25521 klass = load;
25522 memory = 1;
25523 break;
25524 case V8SF_FTYPE_PCV8SF_V8SF:
25525 case V4DF_FTYPE_PCV4DF_V4DF:
25526 case V4SF_FTYPE_PCV4SF_V4SF:
25527 case V2DF_FTYPE_PCV2DF_V2DF:
25528 nargs = 2;
25529 klass = load;
25530 memory = 0;
25531 break;
25532 case VOID_FTYPE_PV8SF_V8SF_V8SF:
25533 case VOID_FTYPE_PV4DF_V4DF_V4DF:
25534 case VOID_FTYPE_PV4SF_V4SF_V4SF:
25535 case VOID_FTYPE_PV2DF_V2DF_V2DF:
25536 nargs = 2;
25537 klass = store;
25538 /* Reserve memory operand for target. */
25539 memory = ARRAY_SIZE (args);
25540 break;
25541 case VOID_FTYPE_UINT_UINT_UINT:
25542 case VOID_FTYPE_UINT64_UINT_UINT:
25543 case UCHAR_FTYPE_UINT_UINT_UINT:
25544 case UCHAR_FTYPE_UINT64_UINT_UINT:
25545 nargs = 3;
25546 klass = load;
25547 memory = ARRAY_SIZE (args);
25548 last_arg_constant = true;
25549 break;
25550 default:
25551 gcc_unreachable ();
25552 }
25553
25554 gcc_assert (nargs <= ARRAY_SIZE (args));
25555
25556 if (klass == store)
25557 {
25558 arg = CALL_EXPR_ARG (exp, 0);
25559 op = expand_normal (arg);
25560 gcc_assert (target == 0);
25561 if (memory)
25562 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
25563 else
25564 target = force_reg (tmode, op);
25565 arg_adjust = 1;
25566 }
25567 else
25568 {
25569 arg_adjust = 0;
25570 if (optimize
25571 || target == 0
25572 || GET_MODE (target) != tmode
25573 || !insn_p->operand[0].predicate (target, tmode))
25574 target = gen_reg_rtx (tmode);
25575 }
25576
25577 for (i = 0; i < nargs; i++)
25578 {
25579 enum machine_mode mode = insn_p->operand[i + 1].mode;
25580 bool match;
25581
25582 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
25583 op = expand_normal (arg);
25584 match = insn_p->operand[i + 1].predicate (op, mode);
25585
25586 if (last_arg_constant && (i + 1) == nargs)
25587 {
25588 if (!match)
25589 {
25590 if (icode == CODE_FOR_lwp_lwpvalsi3
25591 || icode == CODE_FOR_lwp_lwpinssi3
25592 || icode == CODE_FOR_lwp_lwpvaldi3
25593 || icode == CODE_FOR_lwp_lwpinsdi3)
25594 error ("the last argument must be a 32-bit immediate");
25595 else
25596 error ("the last argument must be an 8-bit immediate");
25597 return const0_rtx;
25598 }
25599 }
25600 else
25601 {
25602 if (i == memory)
25603 {
25604 /* This must be the memory operand. */
25605 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
25606 gcc_assert (GET_MODE (op) == mode
25607 || GET_MODE (op) == VOIDmode);
25608 }
25609 else
25610 {
25611 /* This must be register. */
25612 if (VECTOR_MODE_P (mode))
25613 op = safe_vector_operand (op, mode);
25614
25615 gcc_assert (GET_MODE (op) == mode
25616 || GET_MODE (op) == VOIDmode);
25617 op = copy_to_mode_reg (mode, op);
25618 }
25619 }
25620
25621 args[i].op = op;
25622 args[i].mode = mode;
25623 }
25624
25625 switch (nargs)
25626 {
25627 case 0:
25628 pat = GEN_FCN (icode) (target);
25629 break;
25630 case 1:
25631 pat = GEN_FCN (icode) (target, args[0].op);
25632 break;
25633 case 2:
25634 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25635 break;
25636 case 3:
25637 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
25638 break;
25639 default:
25640 gcc_unreachable ();
25641 }
25642
25643 if (! pat)
25644 return 0;
25645 emit_insn (pat);
25646 return klass == store ? 0 : target;
25647 }
25648
25649 /* Return the integer constant in ARG. Constrain it to be in the range
25650 of the subparts of VEC_TYPE; issue an error if not. */
25651
25652 static int
25653 get_element_number (tree vec_type, tree arg)
25654 {
25655 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
25656
25657 if (!host_integerp (arg, 1)
25658 || (elt = tree_low_cst (arg, 1), elt > max))
25659 {
25660 error ("selector must be an integer constant in the range 0..%wi", max);
25661 return 0;
25662 }
25663
25664 return elt;
25665 }
25666
25667 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25668 ix86_expand_vector_init. We DO have language-level syntax for this, in
25669 the form of (type){ init-list }. Except that since we can't place emms
25670 instructions from inside the compiler, we can't allow the use of MMX
25671 registers unless the user explicitly asks for it. So we do *not* define
25672 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
25673 we have builtins invoked by mmintrin.h that gives us license to emit
25674 these sorts of instructions. */
25675
25676 static rtx
25677 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
25678 {
25679 enum machine_mode tmode = TYPE_MODE (type);
25680 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
25681 int i, n_elt = GET_MODE_NUNITS (tmode);
25682 rtvec v = rtvec_alloc (n_elt);
25683
25684 gcc_assert (VECTOR_MODE_P (tmode));
25685 gcc_assert (call_expr_nargs (exp) == n_elt);
25686
25687 for (i = 0; i < n_elt; ++i)
25688 {
25689 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
25690 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
25691 }
25692
25693 if (!target || !register_operand (target, tmode))
25694 target = gen_reg_rtx (tmode);
25695
25696 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
25697 return target;
25698 }
25699
25700 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25701 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
25702 had a language-level syntax for referencing vector elements. */
25703
25704 static rtx
25705 ix86_expand_vec_ext_builtin (tree exp, rtx target)
25706 {
25707 enum machine_mode tmode, mode0;
25708 tree arg0, arg1;
25709 int elt;
25710 rtx op0;
25711
25712 arg0 = CALL_EXPR_ARG (exp, 0);
25713 arg1 = CALL_EXPR_ARG (exp, 1);
25714
25715 op0 = expand_normal (arg0);
25716 elt = get_element_number (TREE_TYPE (arg0), arg1);
25717
25718 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25719 mode0 = TYPE_MODE (TREE_TYPE (arg0));
25720 gcc_assert (VECTOR_MODE_P (mode0));
25721
25722 op0 = force_reg (mode0, op0);
25723
25724 if (optimize || !target || !register_operand (target, tmode))
25725 target = gen_reg_rtx (tmode);
25726
25727 ix86_expand_vector_extract (true, target, op0, elt);
25728
25729 return target;
25730 }
25731
25732 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25733 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
25734 a language-level syntax for referencing vector elements. */
25735
25736 static rtx
25737 ix86_expand_vec_set_builtin (tree exp)
25738 {
25739 enum machine_mode tmode, mode1;
25740 tree arg0, arg1, arg2;
25741 int elt;
25742 rtx op0, op1, target;
25743
25744 arg0 = CALL_EXPR_ARG (exp, 0);
25745 arg1 = CALL_EXPR_ARG (exp, 1);
25746 arg2 = CALL_EXPR_ARG (exp, 2);
25747
25748 tmode = TYPE_MODE (TREE_TYPE (arg0));
25749 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25750 gcc_assert (VECTOR_MODE_P (tmode));
25751
25752 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
25753 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
25754 elt = get_element_number (TREE_TYPE (arg0), arg2);
25755
25756 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
25757 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
25758
25759 op0 = force_reg (tmode, op0);
25760 op1 = force_reg (mode1, op1);
25761
25762 /* OP0 is the source of these builtin functions and shouldn't be
25763 modified. Create a copy, use it and return it as target. */
25764 target = gen_reg_rtx (tmode);
25765 emit_move_insn (target, op0);
25766 ix86_expand_vector_set (true, target, op1, elt);
25767
25768 return target;
25769 }
25770
25771 /* Expand an expression EXP that calls a built-in function,
25772 with result going to TARGET if that's convenient
25773 (and in mode MODE if that's convenient).
25774 SUBTARGET may be used as the target for computing one of EXP's operands.
25775 IGNORE is nonzero if the value is to be ignored. */
25776
25777 static rtx
25778 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25779 enum machine_mode mode ATTRIBUTE_UNUSED,
25780 int ignore ATTRIBUTE_UNUSED)
25781 {
25782 const struct builtin_description *d;
25783 size_t i;
25784 enum insn_code icode;
25785 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25786 tree arg0, arg1, arg2;
25787 rtx op0, op1, op2, pat;
25788 enum machine_mode mode0, mode1, mode2;
25789 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25790
25791 /* Determine whether the builtin function is available under the current ISA.
25792 Originally the builtin was not created if it wasn't applicable to the
25793 current ISA based on the command line switches. With function specific
25794 options, we need to check in the context of the function making the call
25795 whether it is supported. */
25796 if (ix86_builtins_isa[fcode].isa
25797 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25798 {
25799 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25800 NULL, NULL, false);
25801
25802 if (!opts)
25803 error ("%qE needs unknown isa option", fndecl);
25804 else
25805 {
25806 gcc_assert (opts != NULL);
25807 error ("%qE needs isa option %s", fndecl, opts);
25808 free (opts);
25809 }
25810 return const0_rtx;
25811 }
25812
25813 switch (fcode)
25814 {
25815 case IX86_BUILTIN_MASKMOVQ:
25816 case IX86_BUILTIN_MASKMOVDQU:
25817 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25818 ? CODE_FOR_mmx_maskmovq
25819 : CODE_FOR_sse2_maskmovdqu);
25820 /* Note the arg order is different from the operand order. */
25821 arg1 = CALL_EXPR_ARG (exp, 0);
25822 arg2 = CALL_EXPR_ARG (exp, 1);
25823 arg0 = CALL_EXPR_ARG (exp, 2);
25824 op0 = expand_normal (arg0);
25825 op1 = expand_normal (arg1);
25826 op2 = expand_normal (arg2);
25827 mode0 = insn_data[icode].operand[0].mode;
25828 mode1 = insn_data[icode].operand[1].mode;
25829 mode2 = insn_data[icode].operand[2].mode;
25830
25831 op0 = force_reg (Pmode, op0);
25832 op0 = gen_rtx_MEM (mode1, op0);
25833
25834 if (!insn_data[icode].operand[0].predicate (op0, mode0))
25835 op0 = copy_to_mode_reg (mode0, op0);
25836 if (!insn_data[icode].operand[1].predicate (op1, mode1))
25837 op1 = copy_to_mode_reg (mode1, op1);
25838 if (!insn_data[icode].operand[2].predicate (op2, mode2))
25839 op2 = copy_to_mode_reg (mode2, op2);
25840 pat = GEN_FCN (icode) (op0, op1, op2);
25841 if (! pat)
25842 return 0;
25843 emit_insn (pat);
25844 return 0;
25845
25846 case IX86_BUILTIN_LDMXCSR:
25847 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25848 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25849 emit_move_insn (target, op0);
25850 emit_insn (gen_sse_ldmxcsr (target));
25851 return 0;
25852
25853 case IX86_BUILTIN_STMXCSR:
25854 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25855 emit_insn (gen_sse_stmxcsr (target));
25856 return copy_to_mode_reg (SImode, target);
25857
25858 case IX86_BUILTIN_CLFLUSH:
25859 arg0 = CALL_EXPR_ARG (exp, 0);
25860 op0 = expand_normal (arg0);
25861 icode = CODE_FOR_sse2_clflush;
25862 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25863 op0 = copy_to_mode_reg (Pmode, op0);
25864
25865 emit_insn (gen_sse2_clflush (op0));
25866 return 0;
25867
25868 case IX86_BUILTIN_MONITOR:
25869 arg0 = CALL_EXPR_ARG (exp, 0);
25870 arg1 = CALL_EXPR_ARG (exp, 1);
25871 arg2 = CALL_EXPR_ARG (exp, 2);
25872 op0 = expand_normal (arg0);
25873 op1 = expand_normal (arg1);
25874 op2 = expand_normal (arg2);
25875 if (!REG_P (op0))
25876 op0 = copy_to_mode_reg (Pmode, op0);
25877 if (!REG_P (op1))
25878 op1 = copy_to_mode_reg (SImode, op1);
25879 if (!REG_P (op2))
25880 op2 = copy_to_mode_reg (SImode, op2);
25881 emit_insn (ix86_gen_monitor (op0, op1, op2));
25882 return 0;
25883
25884 case IX86_BUILTIN_MWAIT:
25885 arg0 = CALL_EXPR_ARG (exp, 0);
25886 arg1 = CALL_EXPR_ARG (exp, 1);
25887 op0 = expand_normal (arg0);
25888 op1 = expand_normal (arg1);
25889 if (!REG_P (op0))
25890 op0 = copy_to_mode_reg (SImode, op0);
25891 if (!REG_P (op1))
25892 op1 = copy_to_mode_reg (SImode, op1);
25893 emit_insn (gen_sse3_mwait (op0, op1));
25894 return 0;
25895
25896 case IX86_BUILTIN_VEC_INIT_V2SI:
25897 case IX86_BUILTIN_VEC_INIT_V4HI:
25898 case IX86_BUILTIN_VEC_INIT_V8QI:
25899 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25900
25901 case IX86_BUILTIN_VEC_EXT_V2DF:
25902 case IX86_BUILTIN_VEC_EXT_V2DI:
25903 case IX86_BUILTIN_VEC_EXT_V4SF:
25904 case IX86_BUILTIN_VEC_EXT_V4SI:
25905 case IX86_BUILTIN_VEC_EXT_V8HI:
25906 case IX86_BUILTIN_VEC_EXT_V2SI:
25907 case IX86_BUILTIN_VEC_EXT_V4HI:
25908 case IX86_BUILTIN_VEC_EXT_V16QI:
25909 return ix86_expand_vec_ext_builtin (exp, target);
25910
25911 case IX86_BUILTIN_VEC_SET_V2DI:
25912 case IX86_BUILTIN_VEC_SET_V4SF:
25913 case IX86_BUILTIN_VEC_SET_V4SI:
25914 case IX86_BUILTIN_VEC_SET_V8HI:
25915 case IX86_BUILTIN_VEC_SET_V4HI:
25916 case IX86_BUILTIN_VEC_SET_V16QI:
25917 return ix86_expand_vec_set_builtin (exp);
25918
25919 case IX86_BUILTIN_VEC_PERM_V2DF:
25920 case IX86_BUILTIN_VEC_PERM_V4SF:
25921 case IX86_BUILTIN_VEC_PERM_V2DI:
25922 case IX86_BUILTIN_VEC_PERM_V4SI:
25923 case IX86_BUILTIN_VEC_PERM_V8HI:
25924 case IX86_BUILTIN_VEC_PERM_V16QI:
25925 case IX86_BUILTIN_VEC_PERM_V2DI_U:
25926 case IX86_BUILTIN_VEC_PERM_V4SI_U:
25927 case IX86_BUILTIN_VEC_PERM_V8HI_U:
25928 case IX86_BUILTIN_VEC_PERM_V16QI_U:
25929 case IX86_BUILTIN_VEC_PERM_V4DF:
25930 case IX86_BUILTIN_VEC_PERM_V8SF:
25931 return ix86_expand_vec_perm_builtin (exp);
25932
25933 case IX86_BUILTIN_INFQ:
25934 case IX86_BUILTIN_HUGE_VALQ:
25935 {
25936 REAL_VALUE_TYPE inf;
25937 rtx tmp;
25938
25939 real_inf (&inf);
25940 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25941
25942 tmp = validize_mem (force_const_mem (mode, tmp));
25943
25944 if (target == 0)
25945 target = gen_reg_rtx (mode);
25946
25947 emit_move_insn (target, tmp);
25948 return target;
25949 }
25950
25951 case IX86_BUILTIN_LLWPCB:
25952 arg0 = CALL_EXPR_ARG (exp, 0);
25953 op0 = expand_normal (arg0);
25954 icode = CODE_FOR_lwp_llwpcb;
25955 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25956 op0 = copy_to_mode_reg (Pmode, op0);
25957 emit_insn (gen_lwp_llwpcb (op0));
25958 return 0;
25959
25960 case IX86_BUILTIN_SLWPCB:
25961 icode = CODE_FOR_lwp_slwpcb;
25962 if (!target
25963 || !insn_data[icode].operand[0].predicate (target, Pmode))
25964 target = gen_reg_rtx (Pmode);
25965 emit_insn (gen_lwp_slwpcb (target));
25966 return target;
25967
25968 default:
25969 break;
25970 }
25971
25972 for (i = 0, d = bdesc_special_args;
25973 i < ARRAY_SIZE (bdesc_special_args);
25974 i++, d++)
25975 if (d->code == fcode)
25976 return ix86_expand_special_args_builtin (d, exp, target);
25977
25978 for (i = 0, d = bdesc_args;
25979 i < ARRAY_SIZE (bdesc_args);
25980 i++, d++)
25981 if (d->code == fcode)
25982 switch (fcode)
25983 {
25984 case IX86_BUILTIN_FABSQ:
25985 case IX86_BUILTIN_COPYSIGNQ:
25986 if (!TARGET_SSE2)
25987 /* Emit a normal call if SSE2 isn't available. */
25988 return expand_call (exp, target, ignore);
25989 default:
25990 return ix86_expand_args_builtin (d, exp, target);
25991 }
25992
25993 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25994 if (d->code == fcode)
25995 return ix86_expand_sse_comi (d, exp, target);
25996
25997 for (i = 0, d = bdesc_pcmpestr;
25998 i < ARRAY_SIZE (bdesc_pcmpestr);
25999 i++, d++)
26000 if (d->code == fcode)
26001 return ix86_expand_sse_pcmpestr (d, exp, target);
26002
26003 for (i = 0, d = bdesc_pcmpistr;
26004 i < ARRAY_SIZE (bdesc_pcmpistr);
26005 i++, d++)
26006 if (d->code == fcode)
26007 return ix86_expand_sse_pcmpistr (d, exp, target);
26008
26009 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
26010 if (d->code == fcode)
26011 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
26012 (enum ix86_builtin_func_type)
26013 d->flag, d->comparison);
26014
26015 gcc_unreachable ();
26016 }
26017
26018 /* Returns a function decl for a vectorized version of the builtin function
26019 with builtin function code FN and the result vector type TYPE, or NULL_TREE
26020 if it is not available. */
26021
26022 static tree
26023 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
26024 tree type_in)
26025 {
26026 enum machine_mode in_mode, out_mode;
26027 int in_n, out_n;
26028 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
26029
26030 if (TREE_CODE (type_out) != VECTOR_TYPE
26031 || TREE_CODE (type_in) != VECTOR_TYPE
26032 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
26033 return NULL_TREE;
26034
26035 out_mode = TYPE_MODE (TREE_TYPE (type_out));
26036 out_n = TYPE_VECTOR_SUBPARTS (type_out);
26037 in_mode = TYPE_MODE (TREE_TYPE (type_in));
26038 in_n = TYPE_VECTOR_SUBPARTS (type_in);
26039
26040 switch (fn)
26041 {
26042 case BUILT_IN_SQRT:
26043 if (out_mode == DFmode && out_n == 2
26044 && in_mode == DFmode && in_n == 2)
26045 return ix86_builtins[IX86_BUILTIN_SQRTPD];
26046 break;
26047
26048 case BUILT_IN_SQRTF:
26049 if (out_mode == SFmode && out_n == 4
26050 && in_mode == SFmode && in_n == 4)
26051 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
26052 break;
26053
26054 case BUILT_IN_LRINT:
26055 if (out_mode == SImode && out_n == 4
26056 && in_mode == DFmode && in_n == 2)
26057 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
26058 break;
26059
26060 case BUILT_IN_LRINTF:
26061 if (out_mode == SImode && out_n == 4
26062 && in_mode == SFmode && in_n == 4)
26063 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
26064 break;
26065
26066 case BUILT_IN_COPYSIGN:
26067 if (out_mode == DFmode && out_n == 2
26068 && in_mode == DFmode && in_n == 2)
26069 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
26070 break;
26071
26072 case BUILT_IN_COPYSIGNF:
26073 if (out_mode == SFmode && out_n == 4
26074 && in_mode == SFmode && in_n == 4)
26075 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
26076 break;
26077
26078 default:
26079 ;
26080 }
26081
26082 /* Dispatch to a handler for a vectorization library. */
26083 if (ix86_veclib_handler)
26084 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
26085 type_in);
26086
26087 return NULL_TREE;
26088 }
26089
26090 /* Handler for an SVML-style interface to
26091 a library with vectorized intrinsics. */
26092
26093 static tree
26094 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
26095 {
26096 char name[20];
26097 tree fntype, new_fndecl, args;
26098 unsigned arity;
26099 const char *bname;
26100 enum machine_mode el_mode, in_mode;
26101 int n, in_n;
26102
26103 /* The SVML is suitable for unsafe math only. */
26104 if (!flag_unsafe_math_optimizations)
26105 return NULL_TREE;
26106
26107 el_mode = TYPE_MODE (TREE_TYPE (type_out));
26108 n = TYPE_VECTOR_SUBPARTS (type_out);
26109 in_mode = TYPE_MODE (TREE_TYPE (type_in));
26110 in_n = TYPE_VECTOR_SUBPARTS (type_in);
26111 if (el_mode != in_mode
26112 || n != in_n)
26113 return NULL_TREE;
26114
26115 switch (fn)
26116 {
26117 case BUILT_IN_EXP:
26118 case BUILT_IN_LOG:
26119 case BUILT_IN_LOG10:
26120 case BUILT_IN_POW:
26121 case BUILT_IN_TANH:
26122 case BUILT_IN_TAN:
26123 case BUILT_IN_ATAN:
26124 case BUILT_IN_ATAN2:
26125 case BUILT_IN_ATANH:
26126 case BUILT_IN_CBRT:
26127 case BUILT_IN_SINH:
26128 case BUILT_IN_SIN:
26129 case BUILT_IN_ASINH:
26130 case BUILT_IN_ASIN:
26131 case BUILT_IN_COSH:
26132 case BUILT_IN_COS:
26133 case BUILT_IN_ACOSH:
26134 case BUILT_IN_ACOS:
26135 if (el_mode != DFmode || n != 2)
26136 return NULL_TREE;
26137 break;
26138
26139 case BUILT_IN_EXPF:
26140 case BUILT_IN_LOGF:
26141 case BUILT_IN_LOG10F:
26142 case BUILT_IN_POWF:
26143 case BUILT_IN_TANHF:
26144 case BUILT_IN_TANF:
26145 case BUILT_IN_ATANF:
26146 case BUILT_IN_ATAN2F:
26147 case BUILT_IN_ATANHF:
26148 case BUILT_IN_CBRTF:
26149 case BUILT_IN_SINHF:
26150 case BUILT_IN_SINF:
26151 case BUILT_IN_ASINHF:
26152 case BUILT_IN_ASINF:
26153 case BUILT_IN_COSHF:
26154 case BUILT_IN_COSF:
26155 case BUILT_IN_ACOSHF:
26156 case BUILT_IN_ACOSF:
26157 if (el_mode != SFmode || n != 4)
26158 return NULL_TREE;
26159 break;
26160
26161 default:
26162 return NULL_TREE;
26163 }
26164
26165 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
26166
26167 if (fn == BUILT_IN_LOGF)
26168 strcpy (name, "vmlsLn4");
26169 else if (fn == BUILT_IN_LOG)
26170 strcpy (name, "vmldLn2");
26171 else if (n == 4)
26172 {
26173 sprintf (name, "vmls%s", bname+10);
26174 name[strlen (name)-1] = '4';
26175 }
26176 else
26177 sprintf (name, "vmld%s2", bname+10);
26178
26179 /* Convert to uppercase. */
26180 name[4] &= ~0x20;
26181
26182 arity = 0;
26183 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
26184 args = TREE_CHAIN (args))
26185 arity++;
26186
26187 if (arity == 1)
26188 fntype = build_function_type_list (type_out, type_in, NULL);
26189 else
26190 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
26191
26192 /* Build a function declaration for the vectorized function. */
26193 new_fndecl = build_decl (BUILTINS_LOCATION,
26194 FUNCTION_DECL, get_identifier (name), fntype);
26195 TREE_PUBLIC (new_fndecl) = 1;
26196 DECL_EXTERNAL (new_fndecl) = 1;
26197 DECL_IS_NOVOPS (new_fndecl) = 1;
26198 TREE_READONLY (new_fndecl) = 1;
26199
26200 return new_fndecl;
26201 }
26202
26203 /* Handler for an ACML-style interface to
26204 a library with vectorized intrinsics. */
26205
26206 static tree
26207 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
26208 {
26209 char name[20] = "__vr.._";
26210 tree fntype, new_fndecl, args;
26211 unsigned arity;
26212 const char *bname;
26213 enum machine_mode el_mode, in_mode;
26214 int n, in_n;
26215
26216 /* The ACML is 64bits only and suitable for unsafe math only as
26217 it does not correctly support parts of IEEE with the required
26218 precision such as denormals. */
26219 if (!TARGET_64BIT
26220 || !flag_unsafe_math_optimizations)
26221 return NULL_TREE;
26222
26223 el_mode = TYPE_MODE (TREE_TYPE (type_out));
26224 n = TYPE_VECTOR_SUBPARTS (type_out);
26225 in_mode = TYPE_MODE (TREE_TYPE (type_in));
26226 in_n = TYPE_VECTOR_SUBPARTS (type_in);
26227 if (el_mode != in_mode
26228 || n != in_n)
26229 return NULL_TREE;
26230
26231 switch (fn)
26232 {
26233 case BUILT_IN_SIN:
26234 case BUILT_IN_COS:
26235 case BUILT_IN_EXP:
26236 case BUILT_IN_LOG:
26237 case BUILT_IN_LOG2:
26238 case BUILT_IN_LOG10:
26239 name[4] = 'd';
26240 name[5] = '2';
26241 if (el_mode != DFmode
26242 || n != 2)
26243 return NULL_TREE;
26244 break;
26245
26246 case BUILT_IN_SINF:
26247 case BUILT_IN_COSF:
26248 case BUILT_IN_EXPF:
26249 case BUILT_IN_POWF:
26250 case BUILT_IN_LOGF:
26251 case BUILT_IN_LOG2F:
26252 case BUILT_IN_LOG10F:
26253 name[4] = 's';
26254 name[5] = '4';
26255 if (el_mode != SFmode
26256 || n != 4)
26257 return NULL_TREE;
26258 break;
26259
26260 default:
26261 return NULL_TREE;
26262 }
26263
26264 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
26265 sprintf (name + 7, "%s", bname+10);
26266
26267 arity = 0;
26268 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
26269 args = TREE_CHAIN (args))
26270 arity++;
26271
26272 if (arity == 1)
26273 fntype = build_function_type_list (type_out, type_in, NULL);
26274 else
26275 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
26276
26277 /* Build a function declaration for the vectorized function. */
26278 new_fndecl = build_decl (BUILTINS_LOCATION,
26279 FUNCTION_DECL, get_identifier (name), fntype);
26280 TREE_PUBLIC (new_fndecl) = 1;
26281 DECL_EXTERNAL (new_fndecl) = 1;
26282 DECL_IS_NOVOPS (new_fndecl) = 1;
26283 TREE_READONLY (new_fndecl) = 1;
26284
26285 return new_fndecl;
26286 }
26287
26288
26289 /* Returns a decl of a function that implements conversion of an integer vector
26290 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
26291 are the types involved when converting according to CODE.
26292 Return NULL_TREE if it is not available. */
26293
26294 static tree
26295 ix86_vectorize_builtin_conversion (unsigned int code,
26296 tree dest_type, tree src_type)
26297 {
26298 if (! TARGET_SSE2)
26299 return NULL_TREE;
26300
26301 switch (code)
26302 {
26303 case FLOAT_EXPR:
26304 switch (TYPE_MODE (src_type))
26305 {
26306 case V4SImode:
26307 switch (TYPE_MODE (dest_type))
26308 {
26309 case V4SFmode:
26310 return (TYPE_UNSIGNED (src_type)
26311 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
26312 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
26313 case V4DFmode:
26314 return (TYPE_UNSIGNED (src_type)
26315 ? NULL_TREE
26316 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
26317 default:
26318 return NULL_TREE;
26319 }
26320 break;
26321 case V8SImode:
26322 switch (TYPE_MODE (dest_type))
26323 {
26324 case V8SFmode:
26325 return (TYPE_UNSIGNED (src_type)
26326 ? NULL_TREE
26327 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS256]);
26328 default:
26329 return NULL_TREE;
26330 }
26331 break;
26332 default:
26333 return NULL_TREE;
26334 }
26335
26336 case FIX_TRUNC_EXPR:
26337 switch (TYPE_MODE (dest_type))
26338 {
26339 case V4SImode:
26340 switch (TYPE_MODE (src_type))
26341 {
26342 case V4SFmode:
26343 return (TYPE_UNSIGNED (dest_type)
26344 ? NULL_TREE
26345 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
26346 case V4DFmode:
26347 return (TYPE_UNSIGNED (dest_type)
26348 ? NULL_TREE
26349 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
26350 default:
26351 return NULL_TREE;
26352 }
26353 break;
26354
26355 case V8SImode:
26356 switch (TYPE_MODE (src_type))
26357 {
26358 case V8SFmode:
26359 return (TYPE_UNSIGNED (dest_type)
26360 ? NULL_TREE
26361 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
26362 default:
26363 return NULL_TREE;
26364 }
26365 break;
26366
26367 default:
26368 return NULL_TREE;
26369 }
26370
26371 default:
26372 return NULL_TREE;
26373 }
26374
26375 return NULL_TREE;
26376 }
26377
26378 /* Returns a code for a target-specific builtin that implements
26379 reciprocal of the function, or NULL_TREE if not available. */
26380
26381 static tree
26382 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
26383 bool sqrt ATTRIBUTE_UNUSED)
26384 {
26385 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
26386 && flag_finite_math_only && !flag_trapping_math
26387 && flag_unsafe_math_optimizations))
26388 return NULL_TREE;
26389
26390 if (md_fn)
26391 /* Machine dependent builtins. */
26392 switch (fn)
26393 {
26394 /* Vectorized version of sqrt to rsqrt conversion. */
26395 case IX86_BUILTIN_SQRTPS_NR:
26396 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
26397
26398 default:
26399 return NULL_TREE;
26400 }
26401 else
26402 /* Normal builtins. */
26403 switch (fn)
26404 {
26405 /* Sqrt to rsqrt conversion. */
26406 case BUILT_IN_SQRTF:
26407 return ix86_builtins[IX86_BUILTIN_RSQRTF];
26408
26409 default:
26410 return NULL_TREE;
26411 }
26412 }
26413 \f
26414 /* Helper for avx_vpermilps256_operand et al. This is also used by
26415 the expansion functions to turn the parallel back into a mask.
26416 The return value is 0 for no match and the imm8+1 for a match. */
26417
26418 int
26419 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
26420 {
26421 unsigned i, nelt = GET_MODE_NUNITS (mode);
26422 unsigned mask = 0;
26423 unsigned char ipar[8];
26424
26425 if (XVECLEN (par, 0) != (int) nelt)
26426 return 0;
26427
26428 /* Validate that all of the elements are constants, and not totally
26429 out of range. Copy the data into an integral array to make the
26430 subsequent checks easier. */
26431 for (i = 0; i < nelt; ++i)
26432 {
26433 rtx er = XVECEXP (par, 0, i);
26434 unsigned HOST_WIDE_INT ei;
26435
26436 if (!CONST_INT_P (er))
26437 return 0;
26438 ei = INTVAL (er);
26439 if (ei >= nelt)
26440 return 0;
26441 ipar[i] = ei;
26442 }
26443
26444 switch (mode)
26445 {
26446 case V4DFmode:
26447 /* In the 256-bit DFmode case, we can only move elements within
26448 a 128-bit lane. */
26449 for (i = 0; i < 2; ++i)
26450 {
26451 if (ipar[i] >= 2)
26452 return 0;
26453 mask |= ipar[i] << i;
26454 }
26455 for (i = 2; i < 4; ++i)
26456 {
26457 if (ipar[i] < 2)
26458 return 0;
26459 mask |= (ipar[i] - 2) << i;
26460 }
26461 break;
26462
26463 case V8SFmode:
26464 /* In the 256-bit SFmode case, we have full freedom of movement
26465 within the low 128-bit lane, but the high 128-bit lane must
26466 mirror the exact same pattern. */
26467 for (i = 0; i < 4; ++i)
26468 if (ipar[i] + 4 != ipar[i + 4])
26469 return 0;
26470 nelt = 4;
26471 /* FALLTHRU */
26472
26473 case V2DFmode:
26474 case V4SFmode:
26475 /* In the 128-bit case, we've full freedom in the placement of
26476 the elements from the source operand. */
26477 for (i = 0; i < nelt; ++i)
26478 mask |= ipar[i] << (i * (nelt / 2));
26479 break;
26480
26481 default:
26482 gcc_unreachable ();
26483 }
26484
26485 /* Make sure success has a non-zero value by adding one. */
26486 return mask + 1;
26487 }
26488
26489 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
26490 the expansion functions to turn the parallel back into a mask.
26491 The return value is 0 for no match and the imm8+1 for a match. */
26492
26493 int
26494 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
26495 {
26496 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
26497 unsigned mask = 0;
26498 unsigned char ipar[8];
26499
26500 if (XVECLEN (par, 0) != (int) nelt)
26501 return 0;
26502
26503 /* Validate that all of the elements are constants, and not totally
26504 out of range. Copy the data into an integral array to make the
26505 subsequent checks easier. */
26506 for (i = 0; i < nelt; ++i)
26507 {
26508 rtx er = XVECEXP (par, 0, i);
26509 unsigned HOST_WIDE_INT ei;
26510
26511 if (!CONST_INT_P (er))
26512 return 0;
26513 ei = INTVAL (er);
26514 if (ei >= 2 * nelt)
26515 return 0;
26516 ipar[i] = ei;
26517 }
26518
26519 /* Validate that the halves of the permute are halves. */
26520 for (i = 0; i < nelt2 - 1; ++i)
26521 if (ipar[i] + 1 != ipar[i + 1])
26522 return 0;
26523 for (i = nelt2; i < nelt - 1; ++i)
26524 if (ipar[i] + 1 != ipar[i + 1])
26525 return 0;
26526
26527 /* Reconstruct the mask. */
26528 for (i = 0; i < 2; ++i)
26529 {
26530 unsigned e = ipar[i * nelt2];
26531 if (e % nelt2)
26532 return 0;
26533 e /= nelt2;
26534 mask |= e << (i * 4);
26535 }
26536
26537 /* Make sure success has a non-zero value by adding one. */
26538 return mask + 1;
26539 }
26540 \f
26541
26542 /* Store OPERAND to the memory after reload is completed. This means
26543 that we can't easily use assign_stack_local. */
26544 rtx
26545 ix86_force_to_memory (enum machine_mode mode, rtx operand)
26546 {
26547 rtx result;
26548
26549 gcc_assert (reload_completed);
26550 if (ix86_using_red_zone ())
26551 {
26552 result = gen_rtx_MEM (mode,
26553 gen_rtx_PLUS (Pmode,
26554 stack_pointer_rtx,
26555 GEN_INT (-RED_ZONE_SIZE)));
26556 emit_move_insn (result, operand);
26557 }
26558 else if (TARGET_64BIT)
26559 {
26560 switch (mode)
26561 {
26562 case HImode:
26563 case SImode:
26564 operand = gen_lowpart (DImode, operand);
26565 /* FALLTHRU */
26566 case DImode:
26567 emit_insn (
26568 gen_rtx_SET (VOIDmode,
26569 gen_rtx_MEM (DImode,
26570 gen_rtx_PRE_DEC (DImode,
26571 stack_pointer_rtx)),
26572 operand));
26573 break;
26574 default:
26575 gcc_unreachable ();
26576 }
26577 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26578 }
26579 else
26580 {
26581 switch (mode)
26582 {
26583 case DImode:
26584 {
26585 rtx operands[2];
26586 split_double_mode (mode, &operand, 1, operands, operands + 1);
26587 emit_insn (
26588 gen_rtx_SET (VOIDmode,
26589 gen_rtx_MEM (SImode,
26590 gen_rtx_PRE_DEC (Pmode,
26591 stack_pointer_rtx)),
26592 operands[1]));
26593 emit_insn (
26594 gen_rtx_SET (VOIDmode,
26595 gen_rtx_MEM (SImode,
26596 gen_rtx_PRE_DEC (Pmode,
26597 stack_pointer_rtx)),
26598 operands[0]));
26599 }
26600 break;
26601 case HImode:
26602 /* Store HImodes as SImodes. */
26603 operand = gen_lowpart (SImode, operand);
26604 /* FALLTHRU */
26605 case SImode:
26606 emit_insn (
26607 gen_rtx_SET (VOIDmode,
26608 gen_rtx_MEM (GET_MODE (operand),
26609 gen_rtx_PRE_DEC (SImode,
26610 stack_pointer_rtx)),
26611 operand));
26612 break;
26613 default:
26614 gcc_unreachable ();
26615 }
26616 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26617 }
26618 return result;
26619 }
26620
26621 /* Free operand from the memory. */
26622 void
26623 ix86_free_from_memory (enum machine_mode mode)
26624 {
26625 if (!ix86_using_red_zone ())
26626 {
26627 int size;
26628
26629 if (mode == DImode || TARGET_64BIT)
26630 size = 8;
26631 else
26632 size = 4;
26633 /* Use LEA to deallocate stack space. In peephole2 it will be converted
26634 to pop or add instruction if registers are available. */
26635 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
26636 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
26637 GEN_INT (size))));
26638 }
26639 }
26640
26641 /* Implement TARGET_IRA_COVER_CLASSES. If -mfpmath=sse, we prefer
26642 SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
26643 same. */
26644 static const reg_class_t *
26645 i386_ira_cover_classes (void)
26646 {
26647 static const reg_class_t sse_fpmath_classes[] = {
26648 GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
26649 };
26650 static const reg_class_t no_sse_fpmath_classes[] = {
26651 GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
26652 };
26653
26654 return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes;
26655 }
26656
26657 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
26658 QImode must go into class Q_REGS.
26659 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
26660 movdf to do mem-to-mem moves through integer regs. */
26661 enum reg_class
26662 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
26663 {
26664 enum machine_mode mode = GET_MODE (x);
26665
26666 /* We're only allowed to return a subclass of CLASS. Many of the
26667 following checks fail for NO_REGS, so eliminate that early. */
26668 if (regclass == NO_REGS)
26669 return NO_REGS;
26670
26671 /* All classes can load zeros. */
26672 if (x == CONST0_RTX (mode))
26673 return regclass;
26674
26675 /* Force constants into memory if we are loading a (nonzero) constant into
26676 an MMX or SSE register. This is because there are no MMX/SSE instructions
26677 to load from a constant. */
26678 if (CONSTANT_P (x)
26679 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
26680 return NO_REGS;
26681
26682 /* Prefer SSE regs only, if we can use them for math. */
26683 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
26684 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
26685
26686 /* Floating-point constants need more complex checks. */
26687 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
26688 {
26689 /* General regs can load everything. */
26690 if (reg_class_subset_p (regclass, GENERAL_REGS))
26691 return regclass;
26692
26693 /* Floats can load 0 and 1 plus some others. Note that we eliminated
26694 zero above. We only want to wind up preferring 80387 registers if
26695 we plan on doing computation with them. */
26696 if (TARGET_80387
26697 && standard_80387_constant_p (x))
26698 {
26699 /* Limit class to non-sse. */
26700 if (regclass == FLOAT_SSE_REGS)
26701 return FLOAT_REGS;
26702 if (regclass == FP_TOP_SSE_REGS)
26703 return FP_TOP_REG;
26704 if (regclass == FP_SECOND_SSE_REGS)
26705 return FP_SECOND_REG;
26706 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
26707 return regclass;
26708 }
26709
26710 return NO_REGS;
26711 }
26712
26713 /* Generally when we see PLUS here, it's the function invariant
26714 (plus soft-fp const_int). Which can only be computed into general
26715 regs. */
26716 if (GET_CODE (x) == PLUS)
26717 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
26718
26719 /* QImode constants are easy to load, but non-constant QImode data
26720 must go into Q_REGS. */
26721 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
26722 {
26723 if (reg_class_subset_p (regclass, Q_REGS))
26724 return regclass;
26725 if (reg_class_subset_p (Q_REGS, regclass))
26726 return Q_REGS;
26727 return NO_REGS;
26728 }
26729
26730 return regclass;
26731 }
26732
26733 /* Discourage putting floating-point values in SSE registers unless
26734 SSE math is being used, and likewise for the 387 registers. */
26735 enum reg_class
26736 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
26737 {
26738 enum machine_mode mode = GET_MODE (x);
26739
26740 /* Restrict the output reload class to the register bank that we are doing
26741 math on. If we would like not to return a subset of CLASS, reject this
26742 alternative: if reload cannot do this, it will still use its choice. */
26743 mode = GET_MODE (x);
26744 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
26745 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
26746
26747 if (X87_FLOAT_MODE_P (mode))
26748 {
26749 if (regclass == FP_TOP_SSE_REGS)
26750 return FP_TOP_REG;
26751 else if (regclass == FP_SECOND_SSE_REGS)
26752 return FP_SECOND_REG;
26753 else
26754 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
26755 }
26756
26757 return regclass;
26758 }
26759
26760 static reg_class_t
26761 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
26762 enum machine_mode mode,
26763 secondary_reload_info *sri ATTRIBUTE_UNUSED)
26764 {
26765 /* QImode spills from non-QI registers require
26766 intermediate register on 32bit targets. */
26767 if (!in_p && mode == QImode && !TARGET_64BIT
26768 && (rclass == GENERAL_REGS
26769 || rclass == LEGACY_REGS
26770 || rclass == INDEX_REGS))
26771 {
26772 int regno;
26773
26774 if (REG_P (x))
26775 regno = REGNO (x);
26776 else
26777 regno = -1;
26778
26779 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
26780 regno = true_regnum (x);
26781
26782 /* Return Q_REGS if the operand is in memory. */
26783 if (regno == -1)
26784 return Q_REGS;
26785 }
26786
26787 return NO_REGS;
26788 }
26789
26790 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
26791
26792 static bool
26793 ix86_class_likely_spilled_p (reg_class_t rclass)
26794 {
26795 switch (rclass)
26796 {
26797 case AREG:
26798 case DREG:
26799 case CREG:
26800 case BREG:
26801 case AD_REGS:
26802 case SIREG:
26803 case DIREG:
26804 case SSE_FIRST_REG:
26805 case FP_TOP_REG:
26806 case FP_SECOND_REG:
26807 return true;
26808
26809 default:
26810 break;
26811 }
26812
26813 return false;
26814 }
26815
26816 /* If we are copying between general and FP registers, we need a memory
26817 location. The same is true for SSE and MMX registers.
26818
26819 To optimize register_move_cost performance, allow inline variant.
26820
26821 The macro can't work reliably when one of the CLASSES is class containing
26822 registers from multiple units (SSE, MMX, integer). We avoid this by never
26823 combining those units in single alternative in the machine description.
26824 Ensure that this constraint holds to avoid unexpected surprises.
26825
26826 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
26827 enforce these sanity checks. */
26828
26829 static inline bool
26830 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26831 enum machine_mode mode, int strict)
26832 {
26833 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
26834 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
26835 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
26836 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
26837 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
26838 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
26839 {
26840 gcc_assert (!strict);
26841 return true;
26842 }
26843
26844 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
26845 return true;
26846
26847 /* ??? This is a lie. We do have moves between mmx/general, and for
26848 mmx/sse2. But by saying we need secondary memory we discourage the
26849 register allocator from using the mmx registers unless needed. */
26850 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
26851 return true;
26852
26853 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26854 {
26855 /* SSE1 doesn't have any direct moves from other classes. */
26856 if (!TARGET_SSE2)
26857 return true;
26858
26859 /* If the target says that inter-unit moves are more expensive
26860 than moving through memory, then don't generate them. */
26861 if (!TARGET_INTER_UNIT_MOVES)
26862 return true;
26863
26864 /* Between SSE and general, we have moves no larger than word size. */
26865 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
26866 return true;
26867 }
26868
26869 return false;
26870 }
26871
26872 bool
26873 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26874 enum machine_mode mode, int strict)
26875 {
26876 return inline_secondary_memory_needed (class1, class2, mode, strict);
26877 }
26878
26879 /* Return true if the registers in CLASS cannot represent the change from
26880 modes FROM to TO. */
26881
26882 bool
26883 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
26884 enum reg_class regclass)
26885 {
26886 if (from == to)
26887 return false;
26888
26889 /* x87 registers can't do subreg at all, as all values are reformatted
26890 to extended precision. */
26891 if (MAYBE_FLOAT_CLASS_P (regclass))
26892 return true;
26893
26894 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
26895 {
26896 /* Vector registers do not support QI or HImode loads. If we don't
26897 disallow a change to these modes, reload will assume it's ok to
26898 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26899 the vec_dupv4hi pattern. */
26900 if (GET_MODE_SIZE (from) < 4)
26901 return true;
26902
26903 /* Vector registers do not support subreg with nonzero offsets, which
26904 are otherwise valid for integer registers. Since we can't see
26905 whether we have a nonzero offset from here, prohibit all
26906 nonparadoxical subregs changing size. */
26907 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26908 return true;
26909 }
26910
26911 return false;
26912 }
26913
26914 /* Return the cost of moving data of mode M between a
26915 register and memory. A value of 2 is the default; this cost is
26916 relative to those in `REGISTER_MOVE_COST'.
26917
26918 This function is used extensively by register_move_cost that is used to
26919 build tables at startup. Make it inline in this case.
26920 When IN is 2, return maximum of in and out move cost.
26921
26922 If moving between registers and memory is more expensive than
26923 between two registers, you should define this macro to express the
26924 relative cost.
26925
26926 Model also increased moving costs of QImode registers in non
26927 Q_REGS classes.
26928 */
26929 static inline int
26930 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26931 int in)
26932 {
26933 int cost;
26934 if (FLOAT_CLASS_P (regclass))
26935 {
26936 int index;
26937 switch (mode)
26938 {
26939 case SFmode:
26940 index = 0;
26941 break;
26942 case DFmode:
26943 index = 1;
26944 break;
26945 case XFmode:
26946 index = 2;
26947 break;
26948 default:
26949 return 100;
26950 }
26951 if (in == 2)
26952 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26953 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26954 }
26955 if (SSE_CLASS_P (regclass))
26956 {
26957 int index;
26958 switch (GET_MODE_SIZE (mode))
26959 {
26960 case 4:
26961 index = 0;
26962 break;
26963 case 8:
26964 index = 1;
26965 break;
26966 case 16:
26967 index = 2;
26968 break;
26969 default:
26970 return 100;
26971 }
26972 if (in == 2)
26973 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
26974 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
26975 }
26976 if (MMX_CLASS_P (regclass))
26977 {
26978 int index;
26979 switch (GET_MODE_SIZE (mode))
26980 {
26981 case 4:
26982 index = 0;
26983 break;
26984 case 8:
26985 index = 1;
26986 break;
26987 default:
26988 return 100;
26989 }
26990 if (in)
26991 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
26992 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
26993 }
26994 switch (GET_MODE_SIZE (mode))
26995 {
26996 case 1:
26997 if (Q_CLASS_P (regclass) || TARGET_64BIT)
26998 {
26999 if (!in)
27000 return ix86_cost->int_store[0];
27001 if (TARGET_PARTIAL_REG_DEPENDENCY
27002 && optimize_function_for_speed_p (cfun))
27003 cost = ix86_cost->movzbl_load;
27004 else
27005 cost = ix86_cost->int_load[0];
27006 if (in == 2)
27007 return MAX (cost, ix86_cost->int_store[0]);
27008 return cost;
27009 }
27010 else
27011 {
27012 if (in == 2)
27013 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
27014 if (in)
27015 return ix86_cost->movzbl_load;
27016 else
27017 return ix86_cost->int_store[0] + 4;
27018 }
27019 break;
27020 case 2:
27021 if (in == 2)
27022 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
27023 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
27024 default:
27025 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
27026 if (mode == TFmode)
27027 mode = XFmode;
27028 if (in == 2)
27029 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
27030 else if (in)
27031 cost = ix86_cost->int_load[2];
27032 else
27033 cost = ix86_cost->int_store[2];
27034 return (cost * (((int) GET_MODE_SIZE (mode)
27035 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
27036 }
27037 }
27038
27039 static int
27040 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
27041 bool in)
27042 {
27043 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
27044 }
27045
27046
27047 /* Return the cost of moving data from a register in class CLASS1 to
27048 one in class CLASS2.
27049
27050 It is not required that the cost always equal 2 when FROM is the same as TO;
27051 on some machines it is expensive to move between registers if they are not
27052 general registers. */
27053
27054 static int
27055 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
27056 reg_class_t class2_i)
27057 {
27058 enum reg_class class1 = (enum reg_class) class1_i;
27059 enum reg_class class2 = (enum reg_class) class2_i;
27060
27061 /* In case we require secondary memory, compute cost of the store followed
27062 by load. In order to avoid bad register allocation choices, we need
27063 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
27064
27065 if (inline_secondary_memory_needed (class1, class2, mode, 0))
27066 {
27067 int cost = 1;
27068
27069 cost += inline_memory_move_cost (mode, class1, 2);
27070 cost += inline_memory_move_cost (mode, class2, 2);
27071
27072 /* In case of copying from general_purpose_register we may emit multiple
27073 stores followed by single load causing memory size mismatch stall.
27074 Count this as arbitrarily high cost of 20. */
27075 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
27076 cost += 20;
27077
27078 /* In the case of FP/MMX moves, the registers actually overlap, and we
27079 have to switch modes in order to treat them differently. */
27080 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
27081 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
27082 cost += 20;
27083
27084 return cost;
27085 }
27086
27087 /* Moves between SSE/MMX and integer unit are expensive. */
27088 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
27089 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
27090
27091 /* ??? By keeping returned value relatively high, we limit the number
27092 of moves between integer and MMX/SSE registers for all targets.
27093 Additionally, high value prevents problem with x86_modes_tieable_p(),
27094 where integer modes in MMX/SSE registers are not tieable
27095 because of missing QImode and HImode moves to, from or between
27096 MMX/SSE registers. */
27097 return MAX (8, ix86_cost->mmxsse_to_integer);
27098
27099 if (MAYBE_FLOAT_CLASS_P (class1))
27100 return ix86_cost->fp_move;
27101 if (MAYBE_SSE_CLASS_P (class1))
27102 return ix86_cost->sse_move;
27103 if (MAYBE_MMX_CLASS_P (class1))
27104 return ix86_cost->mmx_move;
27105 return 2;
27106 }
27107
27108 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
27109
27110 bool
27111 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
27112 {
27113 /* Flags and only flags can only hold CCmode values. */
27114 if (CC_REGNO_P (regno))
27115 return GET_MODE_CLASS (mode) == MODE_CC;
27116 if (GET_MODE_CLASS (mode) == MODE_CC
27117 || GET_MODE_CLASS (mode) == MODE_RANDOM
27118 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
27119 return 0;
27120 if (FP_REGNO_P (regno))
27121 return VALID_FP_MODE_P (mode);
27122 if (SSE_REGNO_P (regno))
27123 {
27124 /* We implement the move patterns for all vector modes into and
27125 out of SSE registers, even when no operation instructions
27126 are available. OImode move is available only when AVX is
27127 enabled. */
27128 return ((TARGET_AVX && mode == OImode)
27129 || VALID_AVX256_REG_MODE (mode)
27130 || VALID_SSE_REG_MODE (mode)
27131 || VALID_SSE2_REG_MODE (mode)
27132 || VALID_MMX_REG_MODE (mode)
27133 || VALID_MMX_REG_MODE_3DNOW (mode));
27134 }
27135 if (MMX_REGNO_P (regno))
27136 {
27137 /* We implement the move patterns for 3DNOW modes even in MMX mode,
27138 so if the register is available at all, then we can move data of
27139 the given mode into or out of it. */
27140 return (VALID_MMX_REG_MODE (mode)
27141 || VALID_MMX_REG_MODE_3DNOW (mode));
27142 }
27143
27144 if (mode == QImode)
27145 {
27146 /* Take care for QImode values - they can be in non-QI regs,
27147 but then they do cause partial register stalls. */
27148 if (regno <= BX_REG || TARGET_64BIT)
27149 return 1;
27150 if (!TARGET_PARTIAL_REG_STALL)
27151 return 1;
27152 return reload_in_progress || reload_completed;
27153 }
27154 /* We handle both integer and floats in the general purpose registers. */
27155 else if (VALID_INT_MODE_P (mode))
27156 return 1;
27157 else if (VALID_FP_MODE_P (mode))
27158 return 1;
27159 else if (VALID_DFP_MODE_P (mode))
27160 return 1;
27161 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
27162 on to use that value in smaller contexts, this can easily force a
27163 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
27164 supporting DImode, allow it. */
27165 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
27166 return 1;
27167
27168 return 0;
27169 }
27170
27171 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
27172 tieable integer mode. */
27173
27174 static bool
27175 ix86_tieable_integer_mode_p (enum machine_mode mode)
27176 {
27177 switch (mode)
27178 {
27179 case HImode:
27180 case SImode:
27181 return true;
27182
27183 case QImode:
27184 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
27185
27186 case DImode:
27187 return TARGET_64BIT;
27188
27189 default:
27190 return false;
27191 }
27192 }
27193
27194 /* Return true if MODE1 is accessible in a register that can hold MODE2
27195 without copying. That is, all register classes that can hold MODE2
27196 can also hold MODE1. */
27197
27198 bool
27199 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
27200 {
27201 if (mode1 == mode2)
27202 return true;
27203
27204 if (ix86_tieable_integer_mode_p (mode1)
27205 && ix86_tieable_integer_mode_p (mode2))
27206 return true;
27207
27208 /* MODE2 being XFmode implies fp stack or general regs, which means we
27209 can tie any smaller floating point modes to it. Note that we do not
27210 tie this with TFmode. */
27211 if (mode2 == XFmode)
27212 return mode1 == SFmode || mode1 == DFmode;
27213
27214 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
27215 that we can tie it with SFmode. */
27216 if (mode2 == DFmode)
27217 return mode1 == SFmode;
27218
27219 /* If MODE2 is only appropriate for an SSE register, then tie with
27220 any other mode acceptable to SSE registers. */
27221 if (GET_MODE_SIZE (mode2) == 16
27222 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
27223 return (GET_MODE_SIZE (mode1) == 16
27224 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
27225
27226 /* If MODE2 is appropriate for an MMX register, then tie
27227 with any other mode acceptable to MMX registers. */
27228 if (GET_MODE_SIZE (mode2) == 8
27229 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
27230 return (GET_MODE_SIZE (mode1) == 8
27231 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
27232
27233 return false;
27234 }
27235
27236 /* Compute a (partial) cost for rtx X. Return true if the complete
27237 cost has been computed, and false if subexpressions should be
27238 scanned. In either case, *TOTAL contains the cost result. */
27239
27240 static bool
27241 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
27242 {
27243 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
27244 enum machine_mode mode = GET_MODE (x);
27245 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
27246
27247 switch (code)
27248 {
27249 case CONST_INT:
27250 case CONST:
27251 case LABEL_REF:
27252 case SYMBOL_REF:
27253 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
27254 *total = 3;
27255 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
27256 *total = 2;
27257 else if (flag_pic && SYMBOLIC_CONST (x)
27258 && (!TARGET_64BIT
27259 || (!GET_CODE (x) != LABEL_REF
27260 && (GET_CODE (x) != SYMBOL_REF
27261 || !SYMBOL_REF_LOCAL_P (x)))))
27262 *total = 1;
27263 else
27264 *total = 0;
27265 return true;
27266
27267 case CONST_DOUBLE:
27268 if (mode == VOIDmode)
27269 *total = 0;
27270 else
27271 switch (standard_80387_constant_p (x))
27272 {
27273 case 1: /* 0.0 */
27274 *total = 1;
27275 break;
27276 default: /* Other constants */
27277 *total = 2;
27278 break;
27279 case 0:
27280 case -1:
27281 /* Start with (MEM (SYMBOL_REF)), since that's where
27282 it'll probably end up. Add a penalty for size. */
27283 *total = (COSTS_N_INSNS (1)
27284 + (flag_pic != 0 && !TARGET_64BIT)
27285 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
27286 break;
27287 }
27288 return true;
27289
27290 case ZERO_EXTEND:
27291 /* The zero extensions is often completely free on x86_64, so make
27292 it as cheap as possible. */
27293 if (TARGET_64BIT && mode == DImode
27294 && GET_MODE (XEXP (x, 0)) == SImode)
27295 *total = 1;
27296 else if (TARGET_ZERO_EXTEND_WITH_AND)
27297 *total = cost->add;
27298 else
27299 *total = cost->movzx;
27300 return false;
27301
27302 case SIGN_EXTEND:
27303 *total = cost->movsx;
27304 return false;
27305
27306 case ASHIFT:
27307 if (CONST_INT_P (XEXP (x, 1))
27308 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
27309 {
27310 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
27311 if (value == 1)
27312 {
27313 *total = cost->add;
27314 return false;
27315 }
27316 if ((value == 2 || value == 3)
27317 && cost->lea <= cost->shift_const)
27318 {
27319 *total = cost->lea;
27320 return false;
27321 }
27322 }
27323 /* FALLTHRU */
27324
27325 case ROTATE:
27326 case ASHIFTRT:
27327 case LSHIFTRT:
27328 case ROTATERT:
27329 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
27330 {
27331 if (CONST_INT_P (XEXP (x, 1)))
27332 {
27333 if (INTVAL (XEXP (x, 1)) > 32)
27334 *total = cost->shift_const + COSTS_N_INSNS (2);
27335 else
27336 *total = cost->shift_const * 2;
27337 }
27338 else
27339 {
27340 if (GET_CODE (XEXP (x, 1)) == AND)
27341 *total = cost->shift_var * 2;
27342 else
27343 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
27344 }
27345 }
27346 else
27347 {
27348 if (CONST_INT_P (XEXP (x, 1)))
27349 *total = cost->shift_const;
27350 else
27351 *total = cost->shift_var;
27352 }
27353 return false;
27354
27355 case MULT:
27356 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27357 {
27358 /* ??? SSE scalar cost should be used here. */
27359 *total = cost->fmul;
27360 return false;
27361 }
27362 else if (X87_FLOAT_MODE_P (mode))
27363 {
27364 *total = cost->fmul;
27365 return false;
27366 }
27367 else if (FLOAT_MODE_P (mode))
27368 {
27369 /* ??? SSE vector cost should be used here. */
27370 *total = cost->fmul;
27371 return false;
27372 }
27373 else
27374 {
27375 rtx op0 = XEXP (x, 0);
27376 rtx op1 = XEXP (x, 1);
27377 int nbits;
27378 if (CONST_INT_P (XEXP (x, 1)))
27379 {
27380 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
27381 for (nbits = 0; value != 0; value &= value - 1)
27382 nbits++;
27383 }
27384 else
27385 /* This is arbitrary. */
27386 nbits = 7;
27387
27388 /* Compute costs correctly for widening multiplication. */
27389 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
27390 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
27391 == GET_MODE_SIZE (mode))
27392 {
27393 int is_mulwiden = 0;
27394 enum machine_mode inner_mode = GET_MODE (op0);
27395
27396 if (GET_CODE (op0) == GET_CODE (op1))
27397 is_mulwiden = 1, op1 = XEXP (op1, 0);
27398 else if (CONST_INT_P (op1))
27399 {
27400 if (GET_CODE (op0) == SIGN_EXTEND)
27401 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
27402 == INTVAL (op1);
27403 else
27404 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
27405 }
27406
27407 if (is_mulwiden)
27408 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
27409 }
27410
27411 *total = (cost->mult_init[MODE_INDEX (mode)]
27412 + nbits * cost->mult_bit
27413 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
27414
27415 return true;
27416 }
27417
27418 case DIV:
27419 case UDIV:
27420 case MOD:
27421 case UMOD:
27422 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27423 /* ??? SSE cost should be used here. */
27424 *total = cost->fdiv;
27425 else if (X87_FLOAT_MODE_P (mode))
27426 *total = cost->fdiv;
27427 else if (FLOAT_MODE_P (mode))
27428 /* ??? SSE vector cost should be used here. */
27429 *total = cost->fdiv;
27430 else
27431 *total = cost->divide[MODE_INDEX (mode)];
27432 return false;
27433
27434 case PLUS:
27435 if (GET_MODE_CLASS (mode) == MODE_INT
27436 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
27437 {
27438 if (GET_CODE (XEXP (x, 0)) == PLUS
27439 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
27440 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
27441 && CONSTANT_P (XEXP (x, 1)))
27442 {
27443 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
27444 if (val == 2 || val == 4 || val == 8)
27445 {
27446 *total = cost->lea;
27447 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
27448 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
27449 outer_code, speed);
27450 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
27451 return true;
27452 }
27453 }
27454 else if (GET_CODE (XEXP (x, 0)) == MULT
27455 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
27456 {
27457 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
27458 if (val == 2 || val == 4 || val == 8)
27459 {
27460 *total = cost->lea;
27461 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
27462 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
27463 return true;
27464 }
27465 }
27466 else if (GET_CODE (XEXP (x, 0)) == PLUS)
27467 {
27468 *total = cost->lea;
27469 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
27470 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
27471 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
27472 return true;
27473 }
27474 }
27475 /* FALLTHRU */
27476
27477 case MINUS:
27478 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27479 {
27480 /* ??? SSE cost should be used here. */
27481 *total = cost->fadd;
27482 return false;
27483 }
27484 else if (X87_FLOAT_MODE_P (mode))
27485 {
27486 *total = cost->fadd;
27487 return false;
27488 }
27489 else if (FLOAT_MODE_P (mode))
27490 {
27491 /* ??? SSE vector cost should be used here. */
27492 *total = cost->fadd;
27493 return false;
27494 }
27495 /* FALLTHRU */
27496
27497 case AND:
27498 case IOR:
27499 case XOR:
27500 if (!TARGET_64BIT && mode == DImode)
27501 {
27502 *total = (cost->add * 2
27503 + (rtx_cost (XEXP (x, 0), outer_code, speed)
27504 << (GET_MODE (XEXP (x, 0)) != DImode))
27505 + (rtx_cost (XEXP (x, 1), outer_code, speed)
27506 << (GET_MODE (XEXP (x, 1)) != DImode)));
27507 return true;
27508 }
27509 /* FALLTHRU */
27510
27511 case NEG:
27512 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27513 {
27514 /* ??? SSE cost should be used here. */
27515 *total = cost->fchs;
27516 return false;
27517 }
27518 else if (X87_FLOAT_MODE_P (mode))
27519 {
27520 *total = cost->fchs;
27521 return false;
27522 }
27523 else if (FLOAT_MODE_P (mode))
27524 {
27525 /* ??? SSE vector cost should be used here. */
27526 *total = cost->fchs;
27527 return false;
27528 }
27529 /* FALLTHRU */
27530
27531 case NOT:
27532 if (!TARGET_64BIT && mode == DImode)
27533 *total = cost->add * 2;
27534 else
27535 *total = cost->add;
27536 return false;
27537
27538 case COMPARE:
27539 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
27540 && XEXP (XEXP (x, 0), 1) == const1_rtx
27541 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
27542 && XEXP (x, 1) == const0_rtx)
27543 {
27544 /* This kind of construct is implemented using test[bwl].
27545 Treat it as if we had an AND. */
27546 *total = (cost->add
27547 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
27548 + rtx_cost (const1_rtx, outer_code, speed));
27549 return true;
27550 }
27551 return false;
27552
27553 case FLOAT_EXTEND:
27554 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
27555 *total = 0;
27556 return false;
27557
27558 case ABS:
27559 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27560 /* ??? SSE cost should be used here. */
27561 *total = cost->fabs;
27562 else if (X87_FLOAT_MODE_P (mode))
27563 *total = cost->fabs;
27564 else if (FLOAT_MODE_P (mode))
27565 /* ??? SSE vector cost should be used here. */
27566 *total = cost->fabs;
27567 return false;
27568
27569 case SQRT:
27570 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27571 /* ??? SSE cost should be used here. */
27572 *total = cost->fsqrt;
27573 else if (X87_FLOAT_MODE_P (mode))
27574 *total = cost->fsqrt;
27575 else if (FLOAT_MODE_P (mode))
27576 /* ??? SSE vector cost should be used here. */
27577 *total = cost->fsqrt;
27578 return false;
27579
27580 case UNSPEC:
27581 if (XINT (x, 1) == UNSPEC_TP)
27582 *total = 0;
27583 return false;
27584
27585 case VEC_SELECT:
27586 case VEC_CONCAT:
27587 case VEC_MERGE:
27588 case VEC_DUPLICATE:
27589 /* ??? Assume all of these vector manipulation patterns are
27590 recognizable. In which case they all pretty much have the
27591 same cost. */
27592 *total = COSTS_N_INSNS (1);
27593 return true;
27594
27595 default:
27596 return false;
27597 }
27598 }
27599
27600 #if TARGET_MACHO
27601
27602 static int current_machopic_label_num;
27603
27604 /* Given a symbol name and its associated stub, write out the
27605 definition of the stub. */
27606
27607 void
27608 machopic_output_stub (FILE *file, const char *symb, const char *stub)
27609 {
27610 unsigned int length;
27611 char *binder_name, *symbol_name, lazy_ptr_name[32];
27612 int label = ++current_machopic_label_num;
27613
27614 /* For 64-bit we shouldn't get here. */
27615 gcc_assert (!TARGET_64BIT);
27616
27617 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
27618 symb = targetm.strip_name_encoding (symb);
27619
27620 length = strlen (stub);
27621 binder_name = XALLOCAVEC (char, length + 32);
27622 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
27623
27624 length = strlen (symb);
27625 symbol_name = XALLOCAVEC (char, length + 32);
27626 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
27627
27628 sprintf (lazy_ptr_name, "L%d$lz", label);
27629
27630 if (MACHOPIC_PURE)
27631 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
27632 else
27633 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
27634
27635 fprintf (file, "%s:\n", stub);
27636 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27637
27638 if (MACHOPIC_PURE)
27639 {
27640 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
27641 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
27642 fprintf (file, "\tjmp\t*%%edx\n");
27643 }
27644 else
27645 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
27646
27647 fprintf (file, "%s:\n", binder_name);
27648
27649 if (MACHOPIC_PURE)
27650 {
27651 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
27652 fputs ("\tpushl\t%eax\n", file);
27653 }
27654 else
27655 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
27656
27657 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
27658
27659 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
27660 fprintf (file, "%s:\n", lazy_ptr_name);
27661 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27662 fprintf (file, ASM_LONG "%s\n", binder_name);
27663 }
27664 #endif /* TARGET_MACHO */
27665
27666 /* Order the registers for register allocator. */
27667
27668 void
27669 x86_order_regs_for_local_alloc (void)
27670 {
27671 int pos = 0;
27672 int i;
27673
27674 /* First allocate the local general purpose registers. */
27675 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27676 if (GENERAL_REGNO_P (i) && call_used_regs[i])
27677 reg_alloc_order [pos++] = i;
27678
27679 /* Global general purpose registers. */
27680 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27681 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
27682 reg_alloc_order [pos++] = i;
27683
27684 /* x87 registers come first in case we are doing FP math
27685 using them. */
27686 if (!TARGET_SSE_MATH)
27687 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27688 reg_alloc_order [pos++] = i;
27689
27690 /* SSE registers. */
27691 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
27692 reg_alloc_order [pos++] = i;
27693 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
27694 reg_alloc_order [pos++] = i;
27695
27696 /* x87 registers. */
27697 if (TARGET_SSE_MATH)
27698 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27699 reg_alloc_order [pos++] = i;
27700
27701 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
27702 reg_alloc_order [pos++] = i;
27703
27704 /* Initialize the rest of array as we do not allocate some registers
27705 at all. */
27706 while (pos < FIRST_PSEUDO_REGISTER)
27707 reg_alloc_order [pos++] = 0;
27708 }
27709
27710 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
27711 struct attribute_spec.handler. */
27712 static tree
27713 ix86_handle_abi_attribute (tree *node, tree name,
27714 tree args ATTRIBUTE_UNUSED,
27715 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27716 {
27717 if (TREE_CODE (*node) != FUNCTION_TYPE
27718 && TREE_CODE (*node) != METHOD_TYPE
27719 && TREE_CODE (*node) != FIELD_DECL
27720 && TREE_CODE (*node) != TYPE_DECL)
27721 {
27722 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27723 name);
27724 *no_add_attrs = true;
27725 return NULL_TREE;
27726 }
27727 if (!TARGET_64BIT)
27728 {
27729 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
27730 name);
27731 *no_add_attrs = true;
27732 return NULL_TREE;
27733 }
27734
27735 /* Can combine regparm with all attributes but fastcall. */
27736 if (is_attribute_p ("ms_abi", name))
27737 {
27738 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
27739 {
27740 error ("ms_abi and sysv_abi attributes are not compatible");
27741 }
27742
27743 return NULL_TREE;
27744 }
27745 else if (is_attribute_p ("sysv_abi", name))
27746 {
27747 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
27748 {
27749 error ("ms_abi and sysv_abi attributes are not compatible");
27750 }
27751
27752 return NULL_TREE;
27753 }
27754
27755 return NULL_TREE;
27756 }
27757
27758 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
27759 struct attribute_spec.handler. */
27760 static tree
27761 ix86_handle_struct_attribute (tree *node, tree name,
27762 tree args ATTRIBUTE_UNUSED,
27763 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27764 {
27765 tree *type = NULL;
27766 if (DECL_P (*node))
27767 {
27768 if (TREE_CODE (*node) == TYPE_DECL)
27769 type = &TREE_TYPE (*node);
27770 }
27771 else
27772 type = node;
27773
27774 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
27775 || TREE_CODE (*type) == UNION_TYPE)))
27776 {
27777 warning (OPT_Wattributes, "%qE attribute ignored",
27778 name);
27779 *no_add_attrs = true;
27780 }
27781
27782 else if ((is_attribute_p ("ms_struct", name)
27783 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
27784 || ((is_attribute_p ("gcc_struct", name)
27785 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
27786 {
27787 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
27788 name);
27789 *no_add_attrs = true;
27790 }
27791
27792 return NULL_TREE;
27793 }
27794
27795 static tree
27796 ix86_handle_fndecl_attribute (tree *node, tree name,
27797 tree args ATTRIBUTE_UNUSED,
27798 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27799 {
27800 if (TREE_CODE (*node) != FUNCTION_DECL)
27801 {
27802 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27803 name);
27804 *no_add_attrs = true;
27805 }
27806 return NULL_TREE;
27807 }
27808
27809 static bool
27810 ix86_ms_bitfield_layout_p (const_tree record_type)
27811 {
27812 return ((TARGET_MS_BITFIELD_LAYOUT
27813 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
27814 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
27815 }
27816
27817 /* Returns an expression indicating where the this parameter is
27818 located on entry to the FUNCTION. */
27819
27820 static rtx
27821 x86_this_parameter (tree function)
27822 {
27823 tree type = TREE_TYPE (function);
27824 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
27825 int nregs;
27826
27827 if (TARGET_64BIT)
27828 {
27829 const int *parm_regs;
27830
27831 if (ix86_function_type_abi (type) == MS_ABI)
27832 parm_regs = x86_64_ms_abi_int_parameter_registers;
27833 else
27834 parm_regs = x86_64_int_parameter_registers;
27835 return gen_rtx_REG (DImode, parm_regs[aggr]);
27836 }
27837
27838 nregs = ix86_function_regparm (type, function);
27839
27840 if (nregs > 0 && !stdarg_p (type))
27841 {
27842 int regno;
27843
27844 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
27845 regno = aggr ? DX_REG : CX_REG;
27846 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
27847 {
27848 regno = CX_REG;
27849 if (aggr)
27850 return gen_rtx_MEM (SImode,
27851 plus_constant (stack_pointer_rtx, 4));
27852 }
27853 else
27854 {
27855 regno = AX_REG;
27856 if (aggr)
27857 {
27858 regno = DX_REG;
27859 if (nregs == 1)
27860 return gen_rtx_MEM (SImode,
27861 plus_constant (stack_pointer_rtx, 4));
27862 }
27863 }
27864 return gen_rtx_REG (SImode, regno);
27865 }
27866
27867 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
27868 }
27869
27870 /* Determine whether x86_output_mi_thunk can succeed. */
27871
27872 static bool
27873 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
27874 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
27875 HOST_WIDE_INT vcall_offset, const_tree function)
27876 {
27877 /* 64-bit can handle anything. */
27878 if (TARGET_64BIT)
27879 return true;
27880
27881 /* For 32-bit, everything's fine if we have one free register. */
27882 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
27883 return true;
27884
27885 /* Need a free register for vcall_offset. */
27886 if (vcall_offset)
27887 return false;
27888
27889 /* Need a free register for GOT references. */
27890 if (flag_pic && !targetm.binds_local_p (function))
27891 return false;
27892
27893 /* Otherwise ok. */
27894 return true;
27895 }
27896
27897 /* Output the assembler code for a thunk function. THUNK_DECL is the
27898 declaration for the thunk function itself, FUNCTION is the decl for
27899 the target function. DELTA is an immediate constant offset to be
27900 added to THIS. If VCALL_OFFSET is nonzero, the word at
27901 *(*this + vcall_offset) should be added to THIS. */
27902
27903 static void
27904 x86_output_mi_thunk (FILE *file,
27905 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
27906 HOST_WIDE_INT vcall_offset, tree function)
27907 {
27908 rtx xops[3];
27909 rtx this_param = x86_this_parameter (function);
27910 rtx this_reg, tmp;
27911
27912 /* Make sure unwind info is emitted for the thunk if needed. */
27913 final_start_function (emit_barrier (), file, 1);
27914
27915 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
27916 pull it in now and let DELTA benefit. */
27917 if (REG_P (this_param))
27918 this_reg = this_param;
27919 else if (vcall_offset)
27920 {
27921 /* Put the this parameter into %eax. */
27922 xops[0] = this_param;
27923 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
27924 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27925 }
27926 else
27927 this_reg = NULL_RTX;
27928
27929 /* Adjust the this parameter by a fixed constant. */
27930 if (delta)
27931 {
27932 xops[0] = GEN_INT (delta);
27933 xops[1] = this_reg ? this_reg : this_param;
27934 if (TARGET_64BIT)
27935 {
27936 if (!x86_64_general_operand (xops[0], DImode))
27937 {
27938 tmp = gen_rtx_REG (DImode, R10_REG);
27939 xops[1] = tmp;
27940 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
27941 xops[0] = tmp;
27942 xops[1] = this_param;
27943 }
27944 if (x86_maybe_negate_const_int (&xops[0], DImode))
27945 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
27946 else
27947 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
27948 }
27949 else if (x86_maybe_negate_const_int (&xops[0], SImode))
27950 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
27951 else
27952 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
27953 }
27954
27955 /* Adjust the this parameter by a value stored in the vtable. */
27956 if (vcall_offset)
27957 {
27958 if (TARGET_64BIT)
27959 tmp = gen_rtx_REG (DImode, R10_REG);
27960 else
27961 {
27962 int tmp_regno = CX_REG;
27963 if (lookup_attribute ("fastcall",
27964 TYPE_ATTRIBUTES (TREE_TYPE (function)))
27965 || lookup_attribute ("thiscall",
27966 TYPE_ATTRIBUTES (TREE_TYPE (function))))
27967 tmp_regno = AX_REG;
27968 tmp = gen_rtx_REG (SImode, tmp_regno);
27969 }
27970
27971 xops[0] = gen_rtx_MEM (Pmode, this_reg);
27972 xops[1] = tmp;
27973 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27974
27975 /* Adjust the this parameter. */
27976 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
27977 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
27978 {
27979 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
27980 xops[0] = GEN_INT (vcall_offset);
27981 xops[1] = tmp2;
27982 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
27983 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
27984 }
27985 xops[1] = this_reg;
27986 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
27987 }
27988
27989 /* If necessary, drop THIS back to its stack slot. */
27990 if (this_reg && this_reg != this_param)
27991 {
27992 xops[0] = this_reg;
27993 xops[1] = this_param;
27994 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27995 }
27996
27997 xops[0] = XEXP (DECL_RTL (function), 0);
27998 if (TARGET_64BIT)
27999 {
28000 if (!flag_pic || targetm.binds_local_p (function))
28001 output_asm_insn ("jmp\t%P0", xops);
28002 /* All thunks should be in the same object as their target,
28003 and thus binds_local_p should be true. */
28004 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
28005 gcc_unreachable ();
28006 else
28007 {
28008 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
28009 tmp = gen_rtx_CONST (Pmode, tmp);
28010 tmp = gen_rtx_MEM (QImode, tmp);
28011 xops[0] = tmp;
28012 output_asm_insn ("jmp\t%A0", xops);
28013 }
28014 }
28015 else
28016 {
28017 if (!flag_pic || targetm.binds_local_p (function))
28018 output_asm_insn ("jmp\t%P0", xops);
28019 else
28020 #if TARGET_MACHO
28021 if (TARGET_MACHO)
28022 {
28023 rtx sym_ref = XEXP (DECL_RTL (function), 0);
28024 if (TARGET_MACHO_BRANCH_ISLANDS)
28025 sym_ref = (gen_rtx_SYMBOL_REF
28026 (Pmode,
28027 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
28028 tmp = gen_rtx_MEM (QImode, sym_ref);
28029 xops[0] = tmp;
28030 output_asm_insn ("jmp\t%0", xops);
28031 }
28032 else
28033 #endif /* TARGET_MACHO */
28034 {
28035 tmp = gen_rtx_REG (SImode, CX_REG);
28036 output_set_got (tmp, NULL_RTX);
28037
28038 xops[1] = tmp;
28039 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
28040 output_asm_insn ("jmp\t{*}%1", xops);
28041 }
28042 }
28043 final_end_function ();
28044 }
28045
28046 static void
28047 x86_file_start (void)
28048 {
28049 default_file_start ();
28050 #if TARGET_MACHO
28051 darwin_file_start ();
28052 #endif
28053 if (X86_FILE_START_VERSION_DIRECTIVE)
28054 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
28055 if (X86_FILE_START_FLTUSED)
28056 fputs ("\t.global\t__fltused\n", asm_out_file);
28057 if (ix86_asm_dialect == ASM_INTEL)
28058 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
28059 }
28060
28061 int
28062 x86_field_alignment (tree field, int computed)
28063 {
28064 enum machine_mode mode;
28065 tree type = TREE_TYPE (field);
28066
28067 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
28068 return computed;
28069 mode = TYPE_MODE (strip_array_types (type));
28070 if (mode == DFmode || mode == DCmode
28071 || GET_MODE_CLASS (mode) == MODE_INT
28072 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
28073 return MIN (32, computed);
28074 return computed;
28075 }
28076
28077 /* Output assembler code to FILE to increment profiler label # LABELNO
28078 for profiling a function entry. */
28079 void
28080 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
28081 {
28082 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
28083 : MCOUNT_NAME);
28084
28085 if (TARGET_64BIT)
28086 {
28087 #ifndef NO_PROFILE_COUNTERS
28088 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
28089 #endif
28090
28091 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
28092 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
28093 else
28094 fprintf (file, "\tcall\t%s\n", mcount_name);
28095 }
28096 else if (flag_pic)
28097 {
28098 #ifndef NO_PROFILE_COUNTERS
28099 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
28100 LPREFIX, labelno);
28101 #endif
28102 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
28103 }
28104 else
28105 {
28106 #ifndef NO_PROFILE_COUNTERS
28107 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
28108 LPREFIX, labelno);
28109 #endif
28110 fprintf (file, "\tcall\t%s\n", mcount_name);
28111 }
28112 }
28113
28114 /* We don't have exact information about the insn sizes, but we may assume
28115 quite safely that we are informed about all 1 byte insns and memory
28116 address sizes. This is enough to eliminate unnecessary padding in
28117 99% of cases. */
28118
28119 static int
28120 min_insn_size (rtx insn)
28121 {
28122 int l = 0, len;
28123
28124 if (!INSN_P (insn) || !active_insn_p (insn))
28125 return 0;
28126
28127 /* Discard alignments we've emit and jump instructions. */
28128 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
28129 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
28130 return 0;
28131 if (JUMP_TABLE_DATA_P (insn))
28132 return 0;
28133
28134 /* Important case - calls are always 5 bytes.
28135 It is common to have many calls in the row. */
28136 if (CALL_P (insn)
28137 && symbolic_reference_mentioned_p (PATTERN (insn))
28138 && !SIBLING_CALL_P (insn))
28139 return 5;
28140 len = get_attr_length (insn);
28141 if (len <= 1)
28142 return 1;
28143
28144 /* For normal instructions we rely on get_attr_length being exact,
28145 with a few exceptions. */
28146 if (!JUMP_P (insn))
28147 {
28148 enum attr_type type = get_attr_type (insn);
28149
28150 switch (type)
28151 {
28152 case TYPE_MULTI:
28153 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
28154 || asm_noperands (PATTERN (insn)) >= 0)
28155 return 0;
28156 break;
28157 case TYPE_OTHER:
28158 case TYPE_FCMP:
28159 break;
28160 default:
28161 /* Otherwise trust get_attr_length. */
28162 return len;
28163 }
28164
28165 l = get_attr_length_address (insn);
28166 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
28167 l = 4;
28168 }
28169 if (l)
28170 return 1+l;
28171 else
28172 return 2;
28173 }
28174
28175 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
28176
28177 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
28178 window. */
28179
28180 static void
28181 ix86_avoid_jump_mispredicts (void)
28182 {
28183 rtx insn, start = get_insns ();
28184 int nbytes = 0, njumps = 0;
28185 int isjump = 0;
28186
28187 /* Look for all minimal intervals of instructions containing 4 jumps.
28188 The intervals are bounded by START and INSN. NBYTES is the total
28189 size of instructions in the interval including INSN and not including
28190 START. When the NBYTES is smaller than 16 bytes, it is possible
28191 that the end of START and INSN ends up in the same 16byte page.
28192
28193 The smallest offset in the page INSN can start is the case where START
28194 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
28195 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
28196 */
28197 for (insn = start; insn; insn = NEXT_INSN (insn))
28198 {
28199 int min_size;
28200
28201 if (LABEL_P (insn))
28202 {
28203 int align = label_to_alignment (insn);
28204 int max_skip = label_to_max_skip (insn);
28205
28206 if (max_skip > 15)
28207 max_skip = 15;
28208 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
28209 already in the current 16 byte page, because otherwise
28210 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
28211 bytes to reach 16 byte boundary. */
28212 if (align <= 0
28213 || (align <= 3 && max_skip != (1 << align) - 1))
28214 max_skip = 0;
28215 if (dump_file)
28216 fprintf (dump_file, "Label %i with max_skip %i\n",
28217 INSN_UID (insn), max_skip);
28218 if (max_skip)
28219 {
28220 while (nbytes + max_skip >= 16)
28221 {
28222 start = NEXT_INSN (start);
28223 if ((JUMP_P (start)
28224 && GET_CODE (PATTERN (start)) != ADDR_VEC
28225 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
28226 || CALL_P (start))
28227 njumps--, isjump = 1;
28228 else
28229 isjump = 0;
28230 nbytes -= min_insn_size (start);
28231 }
28232 }
28233 continue;
28234 }
28235
28236 min_size = min_insn_size (insn);
28237 nbytes += min_size;
28238 if (dump_file)
28239 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
28240 INSN_UID (insn), min_size);
28241 if ((JUMP_P (insn)
28242 && GET_CODE (PATTERN (insn)) != ADDR_VEC
28243 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
28244 || CALL_P (insn))
28245 njumps++;
28246 else
28247 continue;
28248
28249 while (njumps > 3)
28250 {
28251 start = NEXT_INSN (start);
28252 if ((JUMP_P (start)
28253 && GET_CODE (PATTERN (start)) != ADDR_VEC
28254 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
28255 || CALL_P (start))
28256 njumps--, isjump = 1;
28257 else
28258 isjump = 0;
28259 nbytes -= min_insn_size (start);
28260 }
28261 gcc_assert (njumps >= 0);
28262 if (dump_file)
28263 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
28264 INSN_UID (start), INSN_UID (insn), nbytes);
28265
28266 if (njumps == 3 && isjump && nbytes < 16)
28267 {
28268 int padsize = 15 - nbytes + min_insn_size (insn);
28269
28270 if (dump_file)
28271 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
28272 INSN_UID (insn), padsize);
28273 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
28274 }
28275 }
28276 }
28277 #endif
28278
28279 /* AMD Athlon works faster
28280 when RET is not destination of conditional jump or directly preceded
28281 by other jump instruction. We avoid the penalty by inserting NOP just
28282 before the RET instructions in such cases. */
28283 static void
28284 ix86_pad_returns (void)
28285 {
28286 edge e;
28287 edge_iterator ei;
28288
28289 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
28290 {
28291 basic_block bb = e->src;
28292 rtx ret = BB_END (bb);
28293 rtx prev;
28294 bool replace = false;
28295
28296 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
28297 || optimize_bb_for_size_p (bb))
28298 continue;
28299 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
28300 if (active_insn_p (prev) || LABEL_P (prev))
28301 break;
28302 if (prev && LABEL_P (prev))
28303 {
28304 edge e;
28305 edge_iterator ei;
28306
28307 FOR_EACH_EDGE (e, ei, bb->preds)
28308 if (EDGE_FREQUENCY (e) && e->src->index >= 0
28309 && !(e->flags & EDGE_FALLTHRU))
28310 replace = true;
28311 }
28312 if (!replace)
28313 {
28314 prev = prev_active_insn (ret);
28315 if (prev
28316 && ((JUMP_P (prev) && any_condjump_p (prev))
28317 || CALL_P (prev)))
28318 replace = true;
28319 /* Empty functions get branch mispredict even when the jump destination
28320 is not visible to us. */
28321 if (!prev && !optimize_function_for_size_p (cfun))
28322 replace = true;
28323 }
28324 if (replace)
28325 {
28326 emit_jump_insn_before (gen_return_internal_long (), ret);
28327 delete_insn (ret);
28328 }
28329 }
28330 }
28331
28332 /* Count the minimum number of instructions in BB. Return 4 if the
28333 number of instructions >= 4. */
28334
28335 static int
28336 ix86_count_insn_bb (basic_block bb)
28337 {
28338 rtx insn;
28339 int insn_count = 0;
28340
28341 /* Count number of instructions in this block. Return 4 if the number
28342 of instructions >= 4. */
28343 FOR_BB_INSNS (bb, insn)
28344 {
28345 /* Only happen in exit blocks. */
28346 if (JUMP_P (insn)
28347 && GET_CODE (PATTERN (insn)) == RETURN)
28348 break;
28349
28350 if (NONDEBUG_INSN_P (insn)
28351 && GET_CODE (PATTERN (insn)) != USE
28352 && GET_CODE (PATTERN (insn)) != CLOBBER)
28353 {
28354 insn_count++;
28355 if (insn_count >= 4)
28356 return insn_count;
28357 }
28358 }
28359
28360 return insn_count;
28361 }
28362
28363
28364 /* Count the minimum number of instructions in code path in BB.
28365 Return 4 if the number of instructions >= 4. */
28366
28367 static int
28368 ix86_count_insn (basic_block bb)
28369 {
28370 edge e;
28371 edge_iterator ei;
28372 int min_prev_count;
28373
28374 /* Only bother counting instructions along paths with no
28375 more than 2 basic blocks between entry and exit. Given
28376 that BB has an edge to exit, determine if a predecessor
28377 of BB has an edge from entry. If so, compute the number
28378 of instructions in the predecessor block. If there
28379 happen to be multiple such blocks, compute the minimum. */
28380 min_prev_count = 4;
28381 FOR_EACH_EDGE (e, ei, bb->preds)
28382 {
28383 edge prev_e;
28384 edge_iterator prev_ei;
28385
28386 if (e->src == ENTRY_BLOCK_PTR)
28387 {
28388 min_prev_count = 0;
28389 break;
28390 }
28391 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
28392 {
28393 if (prev_e->src == ENTRY_BLOCK_PTR)
28394 {
28395 int count = ix86_count_insn_bb (e->src);
28396 if (count < min_prev_count)
28397 min_prev_count = count;
28398 break;
28399 }
28400 }
28401 }
28402
28403 if (min_prev_count < 4)
28404 min_prev_count += ix86_count_insn_bb (bb);
28405
28406 return min_prev_count;
28407 }
28408
28409 /* Pad short funtion to 4 instructions. */
28410
28411 static void
28412 ix86_pad_short_function (void)
28413 {
28414 edge e;
28415 edge_iterator ei;
28416
28417 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
28418 {
28419 rtx ret = BB_END (e->src);
28420 if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
28421 {
28422 int insn_count = ix86_count_insn (e->src);
28423
28424 /* Pad short function. */
28425 if (insn_count < 4)
28426 {
28427 rtx insn = ret;
28428
28429 /* Find epilogue. */
28430 while (insn
28431 && (!NOTE_P (insn)
28432 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
28433 insn = PREV_INSN (insn);
28434
28435 if (!insn)
28436 insn = ret;
28437
28438 /* Two NOPs are counted as one instruction. */
28439 insn_count = 2 * (4 - insn_count);
28440 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
28441 }
28442 }
28443 }
28444 }
28445
28446 /* Implement machine specific optimizations. We implement padding of returns
28447 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
28448 static void
28449 ix86_reorg (void)
28450 {
28451 if (optimize && optimize_function_for_speed_p (cfun))
28452 {
28453 if (TARGET_PAD_SHORT_FUNCTION)
28454 ix86_pad_short_function ();
28455 else if (TARGET_PAD_RETURNS)
28456 ix86_pad_returns ();
28457 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
28458 if (TARGET_FOUR_JUMP_LIMIT)
28459 ix86_avoid_jump_mispredicts ();
28460 #endif
28461 }
28462 }
28463
28464 /* Return nonzero when QImode register that must be represented via REX prefix
28465 is used. */
28466 bool
28467 x86_extended_QIreg_mentioned_p (rtx insn)
28468 {
28469 int i;
28470 extract_insn_cached (insn);
28471 for (i = 0; i < recog_data.n_operands; i++)
28472 if (REG_P (recog_data.operand[i])
28473 && REGNO (recog_data.operand[i]) > BX_REG)
28474 return true;
28475 return false;
28476 }
28477
28478 /* Return nonzero when P points to register encoded via REX prefix.
28479 Called via for_each_rtx. */
28480 static int
28481 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
28482 {
28483 unsigned int regno;
28484 if (!REG_P (*p))
28485 return 0;
28486 regno = REGNO (*p);
28487 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
28488 }
28489
28490 /* Return true when INSN mentions register that must be encoded using REX
28491 prefix. */
28492 bool
28493 x86_extended_reg_mentioned_p (rtx insn)
28494 {
28495 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
28496 extended_reg_mentioned_1, NULL);
28497 }
28498
28499 /* If profitable, negate (without causing overflow) integer constant
28500 of mode MODE at location LOC. Return true in this case. */
28501 bool
28502 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
28503 {
28504 HOST_WIDE_INT val;
28505
28506 if (!CONST_INT_P (*loc))
28507 return false;
28508
28509 switch (mode)
28510 {
28511 case DImode:
28512 /* DImode x86_64 constants must fit in 32 bits. */
28513 gcc_assert (x86_64_immediate_operand (*loc, mode));
28514
28515 mode = SImode;
28516 break;
28517
28518 case SImode:
28519 case HImode:
28520 case QImode:
28521 break;
28522
28523 default:
28524 gcc_unreachable ();
28525 }
28526
28527 /* Avoid overflows. */
28528 if (mode_signbit_p (mode, *loc))
28529 return false;
28530
28531 val = INTVAL (*loc);
28532
28533 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
28534 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
28535 if ((val < 0 && val != -128)
28536 || val == 128)
28537 {
28538 *loc = GEN_INT (-val);
28539 return true;
28540 }
28541
28542 return false;
28543 }
28544
28545 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
28546 optabs would emit if we didn't have TFmode patterns. */
28547
28548 void
28549 x86_emit_floatuns (rtx operands[2])
28550 {
28551 rtx neglab, donelab, i0, i1, f0, in, out;
28552 enum machine_mode mode, inmode;
28553
28554 inmode = GET_MODE (operands[1]);
28555 gcc_assert (inmode == SImode || inmode == DImode);
28556
28557 out = operands[0];
28558 in = force_reg (inmode, operands[1]);
28559 mode = GET_MODE (out);
28560 neglab = gen_label_rtx ();
28561 donelab = gen_label_rtx ();
28562 f0 = gen_reg_rtx (mode);
28563
28564 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
28565
28566 expand_float (out, in, 0);
28567
28568 emit_jump_insn (gen_jump (donelab));
28569 emit_barrier ();
28570
28571 emit_label (neglab);
28572
28573 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
28574 1, OPTAB_DIRECT);
28575 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
28576 1, OPTAB_DIRECT);
28577 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
28578
28579 expand_float (f0, i0, 0);
28580
28581 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
28582
28583 emit_label (donelab);
28584 }
28585 \f
28586 /* AVX does not support 32-byte integer vector operations,
28587 thus the longest vector we are faced with is V16QImode. */
28588 #define MAX_VECT_LEN 16
28589
28590 struct expand_vec_perm_d
28591 {
28592 rtx target, op0, op1;
28593 unsigned char perm[MAX_VECT_LEN];
28594 enum machine_mode vmode;
28595 unsigned char nelt;
28596 bool testing_p;
28597 };
28598
28599 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
28600 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
28601
28602 /* Get a vector mode of the same size as the original but with elements
28603 twice as wide. This is only guaranteed to apply to integral vectors. */
28604
28605 static inline enum machine_mode
28606 get_mode_wider_vector (enum machine_mode o)
28607 {
28608 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
28609 enum machine_mode n = GET_MODE_WIDER_MODE (o);
28610 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
28611 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
28612 return n;
28613 }
28614
28615 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28616 with all elements equal to VAR. Return true if successful. */
28617
28618 static bool
28619 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
28620 rtx target, rtx val)
28621 {
28622 bool ok;
28623
28624 switch (mode)
28625 {
28626 case V2SImode:
28627 case V2SFmode:
28628 if (!mmx_ok)
28629 return false;
28630 /* FALLTHRU */
28631
28632 case V4DFmode:
28633 case V4DImode:
28634 case V8SFmode:
28635 case V8SImode:
28636 case V2DFmode:
28637 case V2DImode:
28638 case V4SFmode:
28639 case V4SImode:
28640 {
28641 rtx insn, dup;
28642
28643 /* First attempt to recognize VAL as-is. */
28644 dup = gen_rtx_VEC_DUPLICATE (mode, val);
28645 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
28646 if (recog_memoized (insn) < 0)
28647 {
28648 rtx seq;
28649 /* If that fails, force VAL into a register. */
28650
28651 start_sequence ();
28652 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
28653 seq = get_insns ();
28654 end_sequence ();
28655 if (seq)
28656 emit_insn_before (seq, insn);
28657
28658 ok = recog_memoized (insn) >= 0;
28659 gcc_assert (ok);
28660 }
28661 }
28662 return true;
28663
28664 case V4HImode:
28665 if (!mmx_ok)
28666 return false;
28667 if (TARGET_SSE || TARGET_3DNOW_A)
28668 {
28669 rtx x;
28670
28671 val = gen_lowpart (SImode, val);
28672 x = gen_rtx_TRUNCATE (HImode, val);
28673 x = gen_rtx_VEC_DUPLICATE (mode, x);
28674 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28675 return true;
28676 }
28677 goto widen;
28678
28679 case V8QImode:
28680 if (!mmx_ok)
28681 return false;
28682 goto widen;
28683
28684 case V8HImode:
28685 if (TARGET_SSE2)
28686 {
28687 struct expand_vec_perm_d dperm;
28688 rtx tmp1, tmp2;
28689
28690 permute:
28691 memset (&dperm, 0, sizeof (dperm));
28692 dperm.target = target;
28693 dperm.vmode = mode;
28694 dperm.nelt = GET_MODE_NUNITS (mode);
28695 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
28696
28697 /* Extend to SImode using a paradoxical SUBREG. */
28698 tmp1 = gen_reg_rtx (SImode);
28699 emit_move_insn (tmp1, gen_lowpart (SImode, val));
28700
28701 /* Insert the SImode value as low element of a V4SImode vector. */
28702 tmp2 = gen_lowpart (V4SImode, dperm.op0);
28703 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
28704
28705 ok = (expand_vec_perm_1 (&dperm)
28706 || expand_vec_perm_broadcast_1 (&dperm));
28707 gcc_assert (ok);
28708 return ok;
28709 }
28710 goto widen;
28711
28712 case V16QImode:
28713 if (TARGET_SSE2)
28714 goto permute;
28715 goto widen;
28716
28717 widen:
28718 /* Replicate the value once into the next wider mode and recurse. */
28719 {
28720 enum machine_mode smode, wsmode, wvmode;
28721 rtx x;
28722
28723 smode = GET_MODE_INNER (mode);
28724 wvmode = get_mode_wider_vector (mode);
28725 wsmode = GET_MODE_INNER (wvmode);
28726
28727 val = convert_modes (wsmode, smode, val, true);
28728 x = expand_simple_binop (wsmode, ASHIFT, val,
28729 GEN_INT (GET_MODE_BITSIZE (smode)),
28730 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28731 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
28732
28733 x = gen_lowpart (wvmode, target);
28734 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
28735 gcc_assert (ok);
28736 return ok;
28737 }
28738
28739 case V16HImode:
28740 case V32QImode:
28741 {
28742 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
28743 rtx x = gen_reg_rtx (hvmode);
28744
28745 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
28746 gcc_assert (ok);
28747
28748 x = gen_rtx_VEC_CONCAT (mode, x, x);
28749 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28750 }
28751 return true;
28752
28753 default:
28754 return false;
28755 }
28756 }
28757
28758 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28759 whose ONE_VAR element is VAR, and other elements are zero. Return true
28760 if successful. */
28761
28762 static bool
28763 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
28764 rtx target, rtx var, int one_var)
28765 {
28766 enum machine_mode vsimode;
28767 rtx new_target;
28768 rtx x, tmp;
28769 bool use_vector_set = false;
28770
28771 switch (mode)
28772 {
28773 case V2DImode:
28774 /* For SSE4.1, we normally use vector set. But if the second
28775 element is zero and inter-unit moves are OK, we use movq
28776 instead. */
28777 use_vector_set = (TARGET_64BIT
28778 && TARGET_SSE4_1
28779 && !(TARGET_INTER_UNIT_MOVES
28780 && one_var == 0));
28781 break;
28782 case V16QImode:
28783 case V4SImode:
28784 case V4SFmode:
28785 use_vector_set = TARGET_SSE4_1;
28786 break;
28787 case V8HImode:
28788 use_vector_set = TARGET_SSE2;
28789 break;
28790 case V4HImode:
28791 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
28792 break;
28793 case V32QImode:
28794 case V16HImode:
28795 case V8SImode:
28796 case V8SFmode:
28797 case V4DFmode:
28798 use_vector_set = TARGET_AVX;
28799 break;
28800 case V4DImode:
28801 /* Use ix86_expand_vector_set in 64bit mode only. */
28802 use_vector_set = TARGET_AVX && TARGET_64BIT;
28803 break;
28804 default:
28805 break;
28806 }
28807
28808 if (use_vector_set)
28809 {
28810 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
28811 var = force_reg (GET_MODE_INNER (mode), var);
28812 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28813 return true;
28814 }
28815
28816 switch (mode)
28817 {
28818 case V2SFmode:
28819 case V2SImode:
28820 if (!mmx_ok)
28821 return false;
28822 /* FALLTHRU */
28823
28824 case V2DFmode:
28825 case V2DImode:
28826 if (one_var != 0)
28827 return false;
28828 var = force_reg (GET_MODE_INNER (mode), var);
28829 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
28830 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28831 return true;
28832
28833 case V4SFmode:
28834 case V4SImode:
28835 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
28836 new_target = gen_reg_rtx (mode);
28837 else
28838 new_target = target;
28839 var = force_reg (GET_MODE_INNER (mode), var);
28840 x = gen_rtx_VEC_DUPLICATE (mode, var);
28841 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
28842 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
28843 if (one_var != 0)
28844 {
28845 /* We need to shuffle the value to the correct position, so
28846 create a new pseudo to store the intermediate result. */
28847
28848 /* With SSE2, we can use the integer shuffle insns. */
28849 if (mode != V4SFmode && TARGET_SSE2)
28850 {
28851 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
28852 const1_rtx,
28853 GEN_INT (one_var == 1 ? 0 : 1),
28854 GEN_INT (one_var == 2 ? 0 : 1),
28855 GEN_INT (one_var == 3 ? 0 : 1)));
28856 if (target != new_target)
28857 emit_move_insn (target, new_target);
28858 return true;
28859 }
28860
28861 /* Otherwise convert the intermediate result to V4SFmode and
28862 use the SSE1 shuffle instructions. */
28863 if (mode != V4SFmode)
28864 {
28865 tmp = gen_reg_rtx (V4SFmode);
28866 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
28867 }
28868 else
28869 tmp = new_target;
28870
28871 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
28872 const1_rtx,
28873 GEN_INT (one_var == 1 ? 0 : 1),
28874 GEN_INT (one_var == 2 ? 0+4 : 1+4),
28875 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
28876
28877 if (mode != V4SFmode)
28878 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
28879 else if (tmp != target)
28880 emit_move_insn (target, tmp);
28881 }
28882 else if (target != new_target)
28883 emit_move_insn (target, new_target);
28884 return true;
28885
28886 case V8HImode:
28887 case V16QImode:
28888 vsimode = V4SImode;
28889 goto widen;
28890 case V4HImode:
28891 case V8QImode:
28892 if (!mmx_ok)
28893 return false;
28894 vsimode = V2SImode;
28895 goto widen;
28896 widen:
28897 if (one_var != 0)
28898 return false;
28899
28900 /* Zero extend the variable element to SImode and recurse. */
28901 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
28902
28903 x = gen_reg_rtx (vsimode);
28904 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
28905 var, one_var))
28906 gcc_unreachable ();
28907
28908 emit_move_insn (target, gen_lowpart (mode, x));
28909 return true;
28910
28911 default:
28912 return false;
28913 }
28914 }
28915
28916 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28917 consisting of the values in VALS. It is known that all elements
28918 except ONE_VAR are constants. Return true if successful. */
28919
28920 static bool
28921 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
28922 rtx target, rtx vals, int one_var)
28923 {
28924 rtx var = XVECEXP (vals, 0, one_var);
28925 enum machine_mode wmode;
28926 rtx const_vec, x;
28927
28928 const_vec = copy_rtx (vals);
28929 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
28930 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
28931
28932 switch (mode)
28933 {
28934 case V2DFmode:
28935 case V2DImode:
28936 case V2SFmode:
28937 case V2SImode:
28938 /* For the two element vectors, it's just as easy to use
28939 the general case. */
28940 return false;
28941
28942 case V4DImode:
28943 /* Use ix86_expand_vector_set in 64bit mode only. */
28944 if (!TARGET_64BIT)
28945 return false;
28946 case V4DFmode:
28947 case V8SFmode:
28948 case V8SImode:
28949 case V16HImode:
28950 case V32QImode:
28951 case V4SFmode:
28952 case V4SImode:
28953 case V8HImode:
28954 case V4HImode:
28955 break;
28956
28957 case V16QImode:
28958 if (TARGET_SSE4_1)
28959 break;
28960 wmode = V8HImode;
28961 goto widen;
28962 case V8QImode:
28963 wmode = V4HImode;
28964 goto widen;
28965 widen:
28966 /* There's no way to set one QImode entry easily. Combine
28967 the variable value with its adjacent constant value, and
28968 promote to an HImode set. */
28969 x = XVECEXP (vals, 0, one_var ^ 1);
28970 if (one_var & 1)
28971 {
28972 var = convert_modes (HImode, QImode, var, true);
28973 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
28974 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28975 x = GEN_INT (INTVAL (x) & 0xff);
28976 }
28977 else
28978 {
28979 var = convert_modes (HImode, QImode, var, true);
28980 x = gen_int_mode (INTVAL (x) << 8, HImode);
28981 }
28982 if (x != const0_rtx)
28983 var = expand_simple_binop (HImode, IOR, var, x, var,
28984 1, OPTAB_LIB_WIDEN);
28985
28986 x = gen_reg_rtx (wmode);
28987 emit_move_insn (x, gen_lowpart (wmode, const_vec));
28988 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
28989
28990 emit_move_insn (target, gen_lowpart (mode, x));
28991 return true;
28992
28993 default:
28994 return false;
28995 }
28996
28997 emit_move_insn (target, const_vec);
28998 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28999 return true;
29000 }
29001
29002 /* A subroutine of ix86_expand_vector_init_general. Use vector
29003 concatenate to handle the most general case: all values variable,
29004 and none identical. */
29005
29006 static void
29007 ix86_expand_vector_init_concat (enum machine_mode mode,
29008 rtx target, rtx *ops, int n)
29009 {
29010 enum machine_mode cmode, hmode = VOIDmode;
29011 rtx first[8], second[4];
29012 rtvec v;
29013 int i, j;
29014
29015 switch (n)
29016 {
29017 case 2:
29018 switch (mode)
29019 {
29020 case V8SImode:
29021 cmode = V4SImode;
29022 break;
29023 case V8SFmode:
29024 cmode = V4SFmode;
29025 break;
29026 case V4DImode:
29027 cmode = V2DImode;
29028 break;
29029 case V4DFmode:
29030 cmode = V2DFmode;
29031 break;
29032 case V4SImode:
29033 cmode = V2SImode;
29034 break;
29035 case V4SFmode:
29036 cmode = V2SFmode;
29037 break;
29038 case V2DImode:
29039 cmode = DImode;
29040 break;
29041 case V2SImode:
29042 cmode = SImode;
29043 break;
29044 case V2DFmode:
29045 cmode = DFmode;
29046 break;
29047 case V2SFmode:
29048 cmode = SFmode;
29049 break;
29050 default:
29051 gcc_unreachable ();
29052 }
29053
29054 if (!register_operand (ops[1], cmode))
29055 ops[1] = force_reg (cmode, ops[1]);
29056 if (!register_operand (ops[0], cmode))
29057 ops[0] = force_reg (cmode, ops[0]);
29058 emit_insn (gen_rtx_SET (VOIDmode, target,
29059 gen_rtx_VEC_CONCAT (mode, ops[0],
29060 ops[1])));
29061 break;
29062
29063 case 4:
29064 switch (mode)
29065 {
29066 case V4DImode:
29067 cmode = V2DImode;
29068 break;
29069 case V4DFmode:
29070 cmode = V2DFmode;
29071 break;
29072 case V4SImode:
29073 cmode = V2SImode;
29074 break;
29075 case V4SFmode:
29076 cmode = V2SFmode;
29077 break;
29078 default:
29079 gcc_unreachable ();
29080 }
29081 goto half;
29082
29083 case 8:
29084 switch (mode)
29085 {
29086 case V8SImode:
29087 cmode = V2SImode;
29088 hmode = V4SImode;
29089 break;
29090 case V8SFmode:
29091 cmode = V2SFmode;
29092 hmode = V4SFmode;
29093 break;
29094 default:
29095 gcc_unreachable ();
29096 }
29097 goto half;
29098
29099 half:
29100 /* FIXME: We process inputs backward to help RA. PR 36222. */
29101 i = n - 1;
29102 j = (n >> 1) - 1;
29103 for (; i > 0; i -= 2, j--)
29104 {
29105 first[j] = gen_reg_rtx (cmode);
29106 v = gen_rtvec (2, ops[i - 1], ops[i]);
29107 ix86_expand_vector_init (false, first[j],
29108 gen_rtx_PARALLEL (cmode, v));
29109 }
29110
29111 n >>= 1;
29112 if (n > 2)
29113 {
29114 gcc_assert (hmode != VOIDmode);
29115 for (i = j = 0; i < n; i += 2, j++)
29116 {
29117 second[j] = gen_reg_rtx (hmode);
29118 ix86_expand_vector_init_concat (hmode, second [j],
29119 &first [i], 2);
29120 }
29121 n >>= 1;
29122 ix86_expand_vector_init_concat (mode, target, second, n);
29123 }
29124 else
29125 ix86_expand_vector_init_concat (mode, target, first, n);
29126 break;
29127
29128 default:
29129 gcc_unreachable ();
29130 }
29131 }
29132
29133 /* A subroutine of ix86_expand_vector_init_general. Use vector
29134 interleave to handle the most general case: all values variable,
29135 and none identical. */
29136
29137 static void
29138 ix86_expand_vector_init_interleave (enum machine_mode mode,
29139 rtx target, rtx *ops, int n)
29140 {
29141 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
29142 int i, j;
29143 rtx op0, op1;
29144 rtx (*gen_load_even) (rtx, rtx, rtx);
29145 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
29146 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
29147
29148 switch (mode)
29149 {
29150 case V8HImode:
29151 gen_load_even = gen_vec_setv8hi;
29152 gen_interleave_first_low = gen_vec_interleave_lowv4si;
29153 gen_interleave_second_low = gen_vec_interleave_lowv2di;
29154 inner_mode = HImode;
29155 first_imode = V4SImode;
29156 second_imode = V2DImode;
29157 third_imode = VOIDmode;
29158 break;
29159 case V16QImode:
29160 gen_load_even = gen_vec_setv16qi;
29161 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
29162 gen_interleave_second_low = gen_vec_interleave_lowv4si;
29163 inner_mode = QImode;
29164 first_imode = V8HImode;
29165 second_imode = V4SImode;
29166 third_imode = V2DImode;
29167 break;
29168 default:
29169 gcc_unreachable ();
29170 }
29171
29172 for (i = 0; i < n; i++)
29173 {
29174 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
29175 op0 = gen_reg_rtx (SImode);
29176 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
29177
29178 /* Insert the SImode value as low element of V4SImode vector. */
29179 op1 = gen_reg_rtx (V4SImode);
29180 op0 = gen_rtx_VEC_MERGE (V4SImode,
29181 gen_rtx_VEC_DUPLICATE (V4SImode,
29182 op0),
29183 CONST0_RTX (V4SImode),
29184 const1_rtx);
29185 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
29186
29187 /* Cast the V4SImode vector back to a vector in orignal mode. */
29188 op0 = gen_reg_rtx (mode);
29189 emit_move_insn (op0, gen_lowpart (mode, op1));
29190
29191 /* Load even elements into the second positon. */
29192 emit_insn (gen_load_even (op0,
29193 force_reg (inner_mode,
29194 ops [i + i + 1]),
29195 const1_rtx));
29196
29197 /* Cast vector to FIRST_IMODE vector. */
29198 ops[i] = gen_reg_rtx (first_imode);
29199 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
29200 }
29201
29202 /* Interleave low FIRST_IMODE vectors. */
29203 for (i = j = 0; i < n; i += 2, j++)
29204 {
29205 op0 = gen_reg_rtx (first_imode);
29206 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
29207
29208 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
29209 ops[j] = gen_reg_rtx (second_imode);
29210 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
29211 }
29212
29213 /* Interleave low SECOND_IMODE vectors. */
29214 switch (second_imode)
29215 {
29216 case V4SImode:
29217 for (i = j = 0; i < n / 2; i += 2, j++)
29218 {
29219 op0 = gen_reg_rtx (second_imode);
29220 emit_insn (gen_interleave_second_low (op0, ops[i],
29221 ops[i + 1]));
29222
29223 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
29224 vector. */
29225 ops[j] = gen_reg_rtx (third_imode);
29226 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
29227 }
29228 second_imode = V2DImode;
29229 gen_interleave_second_low = gen_vec_interleave_lowv2di;
29230 /* FALLTHRU */
29231
29232 case V2DImode:
29233 op0 = gen_reg_rtx (second_imode);
29234 emit_insn (gen_interleave_second_low (op0, ops[0],
29235 ops[1]));
29236
29237 /* Cast the SECOND_IMODE vector back to a vector on original
29238 mode. */
29239 emit_insn (gen_rtx_SET (VOIDmode, target,
29240 gen_lowpart (mode, op0)));
29241 break;
29242
29243 default:
29244 gcc_unreachable ();
29245 }
29246 }
29247
29248 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
29249 all values variable, and none identical. */
29250
29251 static void
29252 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
29253 rtx target, rtx vals)
29254 {
29255 rtx ops[32], op0, op1;
29256 enum machine_mode half_mode = VOIDmode;
29257 int n, i;
29258
29259 switch (mode)
29260 {
29261 case V2SFmode:
29262 case V2SImode:
29263 if (!mmx_ok && !TARGET_SSE)
29264 break;
29265 /* FALLTHRU */
29266
29267 case V8SFmode:
29268 case V8SImode:
29269 case V4DFmode:
29270 case V4DImode:
29271 case V4SFmode:
29272 case V4SImode:
29273 case V2DFmode:
29274 case V2DImode:
29275 n = GET_MODE_NUNITS (mode);
29276 for (i = 0; i < n; i++)
29277 ops[i] = XVECEXP (vals, 0, i);
29278 ix86_expand_vector_init_concat (mode, target, ops, n);
29279 return;
29280
29281 case V32QImode:
29282 half_mode = V16QImode;
29283 goto half;
29284
29285 case V16HImode:
29286 half_mode = V8HImode;
29287 goto half;
29288
29289 half:
29290 n = GET_MODE_NUNITS (mode);
29291 for (i = 0; i < n; i++)
29292 ops[i] = XVECEXP (vals, 0, i);
29293 op0 = gen_reg_rtx (half_mode);
29294 op1 = gen_reg_rtx (half_mode);
29295 ix86_expand_vector_init_interleave (half_mode, op0, ops,
29296 n >> 2);
29297 ix86_expand_vector_init_interleave (half_mode, op1,
29298 &ops [n >> 1], n >> 2);
29299 emit_insn (gen_rtx_SET (VOIDmode, target,
29300 gen_rtx_VEC_CONCAT (mode, op0, op1)));
29301 return;
29302
29303 case V16QImode:
29304 if (!TARGET_SSE4_1)
29305 break;
29306 /* FALLTHRU */
29307
29308 case V8HImode:
29309 if (!TARGET_SSE2)
29310 break;
29311
29312 /* Don't use ix86_expand_vector_init_interleave if we can't
29313 move from GPR to SSE register directly. */
29314 if (!TARGET_INTER_UNIT_MOVES)
29315 break;
29316
29317 n = GET_MODE_NUNITS (mode);
29318 for (i = 0; i < n; i++)
29319 ops[i] = XVECEXP (vals, 0, i);
29320 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
29321 return;
29322
29323 case V4HImode:
29324 case V8QImode:
29325 break;
29326
29327 default:
29328 gcc_unreachable ();
29329 }
29330
29331 {
29332 int i, j, n_elts, n_words, n_elt_per_word;
29333 enum machine_mode inner_mode;
29334 rtx words[4], shift;
29335
29336 inner_mode = GET_MODE_INNER (mode);
29337 n_elts = GET_MODE_NUNITS (mode);
29338 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
29339 n_elt_per_word = n_elts / n_words;
29340 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
29341
29342 for (i = 0; i < n_words; ++i)
29343 {
29344 rtx word = NULL_RTX;
29345
29346 for (j = 0; j < n_elt_per_word; ++j)
29347 {
29348 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
29349 elt = convert_modes (word_mode, inner_mode, elt, true);
29350
29351 if (j == 0)
29352 word = elt;
29353 else
29354 {
29355 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
29356 word, 1, OPTAB_LIB_WIDEN);
29357 word = expand_simple_binop (word_mode, IOR, word, elt,
29358 word, 1, OPTAB_LIB_WIDEN);
29359 }
29360 }
29361
29362 words[i] = word;
29363 }
29364
29365 if (n_words == 1)
29366 emit_move_insn (target, gen_lowpart (mode, words[0]));
29367 else if (n_words == 2)
29368 {
29369 rtx tmp = gen_reg_rtx (mode);
29370 emit_clobber (tmp);
29371 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
29372 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
29373 emit_move_insn (target, tmp);
29374 }
29375 else if (n_words == 4)
29376 {
29377 rtx tmp = gen_reg_rtx (V4SImode);
29378 gcc_assert (word_mode == SImode);
29379 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
29380 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
29381 emit_move_insn (target, gen_lowpart (mode, tmp));
29382 }
29383 else
29384 gcc_unreachable ();
29385 }
29386 }
29387
29388 /* Initialize vector TARGET via VALS. Suppress the use of MMX
29389 instructions unless MMX_OK is true. */
29390
29391 void
29392 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
29393 {
29394 enum machine_mode mode = GET_MODE (target);
29395 enum machine_mode inner_mode = GET_MODE_INNER (mode);
29396 int n_elts = GET_MODE_NUNITS (mode);
29397 int n_var = 0, one_var = -1;
29398 bool all_same = true, all_const_zero = true;
29399 int i;
29400 rtx x;
29401
29402 for (i = 0; i < n_elts; ++i)
29403 {
29404 x = XVECEXP (vals, 0, i);
29405 if (!(CONST_INT_P (x)
29406 || GET_CODE (x) == CONST_DOUBLE
29407 || GET_CODE (x) == CONST_FIXED))
29408 n_var++, one_var = i;
29409 else if (x != CONST0_RTX (inner_mode))
29410 all_const_zero = false;
29411 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
29412 all_same = false;
29413 }
29414
29415 /* Constants are best loaded from the constant pool. */
29416 if (n_var == 0)
29417 {
29418 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
29419 return;
29420 }
29421
29422 /* If all values are identical, broadcast the value. */
29423 if (all_same
29424 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
29425 XVECEXP (vals, 0, 0)))
29426 return;
29427
29428 /* Values where only one field is non-constant are best loaded from
29429 the pool and overwritten via move later. */
29430 if (n_var == 1)
29431 {
29432 if (all_const_zero
29433 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
29434 XVECEXP (vals, 0, one_var),
29435 one_var))
29436 return;
29437
29438 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
29439 return;
29440 }
29441
29442 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
29443 }
29444
29445 void
29446 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
29447 {
29448 enum machine_mode mode = GET_MODE (target);
29449 enum machine_mode inner_mode = GET_MODE_INNER (mode);
29450 enum machine_mode half_mode;
29451 bool use_vec_merge = false;
29452 rtx tmp;
29453 static rtx (*gen_extract[6][2]) (rtx, rtx)
29454 = {
29455 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
29456 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
29457 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
29458 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
29459 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
29460 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
29461 };
29462 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
29463 = {
29464 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
29465 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
29466 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
29467 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
29468 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
29469 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
29470 };
29471 int i, j, n;
29472
29473 switch (mode)
29474 {
29475 case V2SFmode:
29476 case V2SImode:
29477 if (mmx_ok)
29478 {
29479 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
29480 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
29481 if (elt == 0)
29482 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
29483 else
29484 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
29485 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29486 return;
29487 }
29488 break;
29489
29490 case V2DImode:
29491 use_vec_merge = TARGET_SSE4_1;
29492 if (use_vec_merge)
29493 break;
29494
29495 case V2DFmode:
29496 {
29497 rtx op0, op1;
29498
29499 /* For the two element vectors, we implement a VEC_CONCAT with
29500 the extraction of the other element. */
29501
29502 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
29503 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
29504
29505 if (elt == 0)
29506 op0 = val, op1 = tmp;
29507 else
29508 op0 = tmp, op1 = val;
29509
29510 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
29511 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29512 }
29513 return;
29514
29515 case V4SFmode:
29516 use_vec_merge = TARGET_SSE4_1;
29517 if (use_vec_merge)
29518 break;
29519
29520 switch (elt)
29521 {
29522 case 0:
29523 use_vec_merge = true;
29524 break;
29525
29526 case 1:
29527 /* tmp = target = A B C D */
29528 tmp = copy_to_reg (target);
29529 /* target = A A B B */
29530 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
29531 /* target = X A B B */
29532 ix86_expand_vector_set (false, target, val, 0);
29533 /* target = A X C D */
29534 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
29535 const1_rtx, const0_rtx,
29536 GEN_INT (2+4), GEN_INT (3+4)));
29537 return;
29538
29539 case 2:
29540 /* tmp = target = A B C D */
29541 tmp = copy_to_reg (target);
29542 /* tmp = X B C D */
29543 ix86_expand_vector_set (false, tmp, val, 0);
29544 /* target = A B X D */
29545 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
29546 const0_rtx, const1_rtx,
29547 GEN_INT (0+4), GEN_INT (3+4)));
29548 return;
29549
29550 case 3:
29551 /* tmp = target = A B C D */
29552 tmp = copy_to_reg (target);
29553 /* tmp = X B C D */
29554 ix86_expand_vector_set (false, tmp, val, 0);
29555 /* target = A B X D */
29556 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
29557 const0_rtx, const1_rtx,
29558 GEN_INT (2+4), GEN_INT (0+4)));
29559 return;
29560
29561 default:
29562 gcc_unreachable ();
29563 }
29564 break;
29565
29566 case V4SImode:
29567 use_vec_merge = TARGET_SSE4_1;
29568 if (use_vec_merge)
29569 break;
29570
29571 /* Element 0 handled by vec_merge below. */
29572 if (elt == 0)
29573 {
29574 use_vec_merge = true;
29575 break;
29576 }
29577
29578 if (TARGET_SSE2)
29579 {
29580 /* With SSE2, use integer shuffles to swap element 0 and ELT,
29581 store into element 0, then shuffle them back. */
29582
29583 rtx order[4];
29584
29585 order[0] = GEN_INT (elt);
29586 order[1] = const1_rtx;
29587 order[2] = const2_rtx;
29588 order[3] = GEN_INT (3);
29589 order[elt] = const0_rtx;
29590
29591 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
29592 order[1], order[2], order[3]));
29593
29594 ix86_expand_vector_set (false, target, val, 0);
29595
29596 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
29597 order[1], order[2], order[3]));
29598 }
29599 else
29600 {
29601 /* For SSE1, we have to reuse the V4SF code. */
29602 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
29603 gen_lowpart (SFmode, val), elt);
29604 }
29605 return;
29606
29607 case V8HImode:
29608 use_vec_merge = TARGET_SSE2;
29609 break;
29610 case V4HImode:
29611 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
29612 break;
29613
29614 case V16QImode:
29615 use_vec_merge = TARGET_SSE4_1;
29616 break;
29617
29618 case V8QImode:
29619 break;
29620
29621 case V32QImode:
29622 half_mode = V16QImode;
29623 j = 0;
29624 n = 16;
29625 goto half;
29626
29627 case V16HImode:
29628 half_mode = V8HImode;
29629 j = 1;
29630 n = 8;
29631 goto half;
29632
29633 case V8SImode:
29634 half_mode = V4SImode;
29635 j = 2;
29636 n = 4;
29637 goto half;
29638
29639 case V4DImode:
29640 half_mode = V2DImode;
29641 j = 3;
29642 n = 2;
29643 goto half;
29644
29645 case V8SFmode:
29646 half_mode = V4SFmode;
29647 j = 4;
29648 n = 4;
29649 goto half;
29650
29651 case V4DFmode:
29652 half_mode = V2DFmode;
29653 j = 5;
29654 n = 2;
29655 goto half;
29656
29657 half:
29658 /* Compute offset. */
29659 i = elt / n;
29660 elt %= n;
29661
29662 gcc_assert (i <= 1);
29663
29664 /* Extract the half. */
29665 tmp = gen_reg_rtx (half_mode);
29666 emit_insn (gen_extract[j][i] (tmp, target));
29667
29668 /* Put val in tmp at elt. */
29669 ix86_expand_vector_set (false, tmp, val, elt);
29670
29671 /* Put it back. */
29672 emit_insn (gen_insert[j][i] (target, target, tmp));
29673 return;
29674
29675 default:
29676 break;
29677 }
29678
29679 if (use_vec_merge)
29680 {
29681 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
29682 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
29683 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29684 }
29685 else
29686 {
29687 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
29688
29689 emit_move_insn (mem, target);
29690
29691 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
29692 emit_move_insn (tmp, val);
29693
29694 emit_move_insn (target, mem);
29695 }
29696 }
29697
29698 void
29699 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
29700 {
29701 enum machine_mode mode = GET_MODE (vec);
29702 enum machine_mode inner_mode = GET_MODE_INNER (mode);
29703 bool use_vec_extr = false;
29704 rtx tmp;
29705
29706 switch (mode)
29707 {
29708 case V2SImode:
29709 case V2SFmode:
29710 if (!mmx_ok)
29711 break;
29712 /* FALLTHRU */
29713
29714 case V2DFmode:
29715 case V2DImode:
29716 use_vec_extr = true;
29717 break;
29718
29719 case V4SFmode:
29720 use_vec_extr = TARGET_SSE4_1;
29721 if (use_vec_extr)
29722 break;
29723
29724 switch (elt)
29725 {
29726 case 0:
29727 tmp = vec;
29728 break;
29729
29730 case 1:
29731 case 3:
29732 tmp = gen_reg_rtx (mode);
29733 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
29734 GEN_INT (elt), GEN_INT (elt),
29735 GEN_INT (elt+4), GEN_INT (elt+4)));
29736 break;
29737
29738 case 2:
29739 tmp = gen_reg_rtx (mode);
29740 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
29741 break;
29742
29743 default:
29744 gcc_unreachable ();
29745 }
29746 vec = tmp;
29747 use_vec_extr = true;
29748 elt = 0;
29749 break;
29750
29751 case V4SImode:
29752 use_vec_extr = TARGET_SSE4_1;
29753 if (use_vec_extr)
29754 break;
29755
29756 if (TARGET_SSE2)
29757 {
29758 switch (elt)
29759 {
29760 case 0:
29761 tmp = vec;
29762 break;
29763
29764 case 1:
29765 case 3:
29766 tmp = gen_reg_rtx (mode);
29767 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
29768 GEN_INT (elt), GEN_INT (elt),
29769 GEN_INT (elt), GEN_INT (elt)));
29770 break;
29771
29772 case 2:
29773 tmp = gen_reg_rtx (mode);
29774 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
29775 break;
29776
29777 default:
29778 gcc_unreachable ();
29779 }
29780 vec = tmp;
29781 use_vec_extr = true;
29782 elt = 0;
29783 }
29784 else
29785 {
29786 /* For SSE1, we have to reuse the V4SF code. */
29787 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
29788 gen_lowpart (V4SFmode, vec), elt);
29789 return;
29790 }
29791 break;
29792
29793 case V8HImode:
29794 use_vec_extr = TARGET_SSE2;
29795 break;
29796 case V4HImode:
29797 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
29798 break;
29799
29800 case V16QImode:
29801 use_vec_extr = TARGET_SSE4_1;
29802 break;
29803
29804 case V8QImode:
29805 /* ??? Could extract the appropriate HImode element and shift. */
29806 default:
29807 break;
29808 }
29809
29810 if (use_vec_extr)
29811 {
29812 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
29813 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
29814
29815 /* Let the rtl optimizers know about the zero extension performed. */
29816 if (inner_mode == QImode || inner_mode == HImode)
29817 {
29818 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
29819 target = gen_lowpart (SImode, target);
29820 }
29821
29822 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29823 }
29824 else
29825 {
29826 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
29827
29828 emit_move_insn (mem, vec);
29829
29830 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
29831 emit_move_insn (target, tmp);
29832 }
29833 }
29834
29835 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
29836 pattern to reduce; DEST is the destination; IN is the input vector. */
29837
29838 void
29839 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
29840 {
29841 rtx tmp1, tmp2, tmp3;
29842
29843 tmp1 = gen_reg_rtx (V4SFmode);
29844 tmp2 = gen_reg_rtx (V4SFmode);
29845 tmp3 = gen_reg_rtx (V4SFmode);
29846
29847 emit_insn (gen_sse_movhlps (tmp1, in, in));
29848 emit_insn (fn (tmp2, tmp1, in));
29849
29850 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
29851 const1_rtx, const1_rtx,
29852 GEN_INT (1+4), GEN_INT (1+4)));
29853 emit_insn (fn (dest, tmp2, tmp3));
29854 }
29855 \f
29856 /* Target hook for scalar_mode_supported_p. */
29857 static bool
29858 ix86_scalar_mode_supported_p (enum machine_mode mode)
29859 {
29860 if (DECIMAL_FLOAT_MODE_P (mode))
29861 return default_decimal_float_supported_p ();
29862 else if (mode == TFmode)
29863 return true;
29864 else
29865 return default_scalar_mode_supported_p (mode);
29866 }
29867
29868 /* Implements target hook vector_mode_supported_p. */
29869 static bool
29870 ix86_vector_mode_supported_p (enum machine_mode mode)
29871 {
29872 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
29873 return true;
29874 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
29875 return true;
29876 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
29877 return true;
29878 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
29879 return true;
29880 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
29881 return true;
29882 return false;
29883 }
29884
29885 /* Target hook for c_mode_for_suffix. */
29886 static enum machine_mode
29887 ix86_c_mode_for_suffix (char suffix)
29888 {
29889 if (suffix == 'q')
29890 return TFmode;
29891 if (suffix == 'w')
29892 return XFmode;
29893
29894 return VOIDmode;
29895 }
29896
29897 /* Worker function for TARGET_MD_ASM_CLOBBERS.
29898
29899 We do this in the new i386 backend to maintain source compatibility
29900 with the old cc0-based compiler. */
29901
29902 static tree
29903 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
29904 tree inputs ATTRIBUTE_UNUSED,
29905 tree clobbers)
29906 {
29907 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
29908 clobbers);
29909 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
29910 clobbers);
29911 return clobbers;
29912 }
29913
29914 /* Implements target vector targetm.asm.encode_section_info. This
29915 is not used by netware. */
29916
29917 static void ATTRIBUTE_UNUSED
29918 ix86_encode_section_info (tree decl, rtx rtl, int first)
29919 {
29920 default_encode_section_info (decl, rtl, first);
29921
29922 if (TREE_CODE (decl) == VAR_DECL
29923 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
29924 && ix86_in_large_data_p (decl))
29925 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
29926 }
29927
29928 /* Worker function for REVERSE_CONDITION. */
29929
29930 enum rtx_code
29931 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
29932 {
29933 return (mode != CCFPmode && mode != CCFPUmode
29934 ? reverse_condition (code)
29935 : reverse_condition_maybe_unordered (code));
29936 }
29937
29938 /* Output code to perform an x87 FP register move, from OPERANDS[1]
29939 to OPERANDS[0]. */
29940
29941 const char *
29942 output_387_reg_move (rtx insn, rtx *operands)
29943 {
29944 if (REG_P (operands[0]))
29945 {
29946 if (REG_P (operands[1])
29947 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29948 {
29949 if (REGNO (operands[0]) == FIRST_STACK_REG)
29950 return output_387_ffreep (operands, 0);
29951 return "fstp\t%y0";
29952 }
29953 if (STACK_TOP_P (operands[0]))
29954 return "fld%Z1\t%y1";
29955 return "fst\t%y0";
29956 }
29957 else if (MEM_P (operands[0]))
29958 {
29959 gcc_assert (REG_P (operands[1]));
29960 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29961 return "fstp%Z0\t%y0";
29962 else
29963 {
29964 /* There is no non-popping store to memory for XFmode.
29965 So if we need one, follow the store with a load. */
29966 if (GET_MODE (operands[0]) == XFmode)
29967 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
29968 else
29969 return "fst%Z0\t%y0";
29970 }
29971 }
29972 else
29973 gcc_unreachable();
29974 }
29975
29976 /* Output code to perform a conditional jump to LABEL, if C2 flag in
29977 FP status register is set. */
29978
29979 void
29980 ix86_emit_fp_unordered_jump (rtx label)
29981 {
29982 rtx reg = gen_reg_rtx (HImode);
29983 rtx temp;
29984
29985 emit_insn (gen_x86_fnstsw_1 (reg));
29986
29987 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
29988 {
29989 emit_insn (gen_x86_sahf_1 (reg));
29990
29991 temp = gen_rtx_REG (CCmode, FLAGS_REG);
29992 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
29993 }
29994 else
29995 {
29996 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
29997
29998 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
29999 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
30000 }
30001
30002 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
30003 gen_rtx_LABEL_REF (VOIDmode, label),
30004 pc_rtx);
30005 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
30006
30007 emit_jump_insn (temp);
30008 predict_jump (REG_BR_PROB_BASE * 10 / 100);
30009 }
30010
30011 /* Output code to perform a log1p XFmode calculation. */
30012
30013 void ix86_emit_i387_log1p (rtx op0, rtx op1)
30014 {
30015 rtx label1 = gen_label_rtx ();
30016 rtx label2 = gen_label_rtx ();
30017
30018 rtx tmp = gen_reg_rtx (XFmode);
30019 rtx tmp2 = gen_reg_rtx (XFmode);
30020 rtx test;
30021
30022 emit_insn (gen_absxf2 (tmp, op1));
30023 test = gen_rtx_GE (VOIDmode, tmp,
30024 CONST_DOUBLE_FROM_REAL_VALUE (
30025 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
30026 XFmode));
30027 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
30028
30029 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
30030 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
30031 emit_jump (label2);
30032
30033 emit_label (label1);
30034 emit_move_insn (tmp, CONST1_RTX (XFmode));
30035 emit_insn (gen_addxf3 (tmp, op1, tmp));
30036 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
30037 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
30038
30039 emit_label (label2);
30040 }
30041
30042 /* Output code to perform a Newton-Rhapson approximation of a single precision
30043 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
30044
30045 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
30046 {
30047 rtx x0, x1, e0, e1, two;
30048
30049 x0 = gen_reg_rtx (mode);
30050 e0 = gen_reg_rtx (mode);
30051 e1 = gen_reg_rtx (mode);
30052 x1 = gen_reg_rtx (mode);
30053
30054 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
30055
30056 if (VECTOR_MODE_P (mode))
30057 two = ix86_build_const_vector (SFmode, true, two);
30058
30059 two = force_reg (mode, two);
30060
30061 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
30062
30063 /* x0 = rcp(b) estimate */
30064 emit_insn (gen_rtx_SET (VOIDmode, x0,
30065 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
30066 UNSPEC_RCP)));
30067 /* e0 = x0 * a */
30068 emit_insn (gen_rtx_SET (VOIDmode, e0,
30069 gen_rtx_MULT (mode, x0, a)));
30070 /* e1 = x0 * b */
30071 emit_insn (gen_rtx_SET (VOIDmode, e1,
30072 gen_rtx_MULT (mode, x0, b)));
30073 /* x1 = 2. - e1 */
30074 emit_insn (gen_rtx_SET (VOIDmode, x1,
30075 gen_rtx_MINUS (mode, two, e1)));
30076 /* res = e0 * x1 */
30077 emit_insn (gen_rtx_SET (VOIDmode, res,
30078 gen_rtx_MULT (mode, e0, x1)));
30079 }
30080
30081 /* Output code to perform a Newton-Rhapson approximation of a
30082 single precision floating point [reciprocal] square root. */
30083
30084 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
30085 bool recip)
30086 {
30087 rtx x0, e0, e1, e2, e3, mthree, mhalf;
30088 REAL_VALUE_TYPE r;
30089
30090 x0 = gen_reg_rtx (mode);
30091 e0 = gen_reg_rtx (mode);
30092 e1 = gen_reg_rtx (mode);
30093 e2 = gen_reg_rtx (mode);
30094 e3 = gen_reg_rtx (mode);
30095
30096 real_from_integer (&r, VOIDmode, -3, -1, 0);
30097 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
30098
30099 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
30100 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
30101
30102 if (VECTOR_MODE_P (mode))
30103 {
30104 mthree = ix86_build_const_vector (SFmode, true, mthree);
30105 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
30106 }
30107
30108 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
30109 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
30110
30111 /* x0 = rsqrt(a) estimate */
30112 emit_insn (gen_rtx_SET (VOIDmode, x0,
30113 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
30114 UNSPEC_RSQRT)));
30115
30116 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
30117 if (!recip)
30118 {
30119 rtx zero, mask;
30120
30121 zero = gen_reg_rtx (mode);
30122 mask = gen_reg_rtx (mode);
30123
30124 zero = force_reg (mode, CONST0_RTX(mode));
30125 emit_insn (gen_rtx_SET (VOIDmode, mask,
30126 gen_rtx_NE (mode, zero, a)));
30127
30128 emit_insn (gen_rtx_SET (VOIDmode, x0,
30129 gen_rtx_AND (mode, x0, mask)));
30130 }
30131
30132 /* e0 = x0 * a */
30133 emit_insn (gen_rtx_SET (VOIDmode, e0,
30134 gen_rtx_MULT (mode, x0, a)));
30135 /* e1 = e0 * x0 */
30136 emit_insn (gen_rtx_SET (VOIDmode, e1,
30137 gen_rtx_MULT (mode, e0, x0)));
30138
30139 /* e2 = e1 - 3. */
30140 mthree = force_reg (mode, mthree);
30141 emit_insn (gen_rtx_SET (VOIDmode, e2,
30142 gen_rtx_PLUS (mode, e1, mthree)));
30143
30144 mhalf = force_reg (mode, mhalf);
30145 if (recip)
30146 /* e3 = -.5 * x0 */
30147 emit_insn (gen_rtx_SET (VOIDmode, e3,
30148 gen_rtx_MULT (mode, x0, mhalf)));
30149 else
30150 /* e3 = -.5 * e0 */
30151 emit_insn (gen_rtx_SET (VOIDmode, e3,
30152 gen_rtx_MULT (mode, e0, mhalf)));
30153 /* ret = e2 * e3 */
30154 emit_insn (gen_rtx_SET (VOIDmode, res,
30155 gen_rtx_MULT (mode, e2, e3)));
30156 }
30157
30158 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
30159
30160 static void ATTRIBUTE_UNUSED
30161 i386_solaris_elf_named_section (const char *name, unsigned int flags,
30162 tree decl)
30163 {
30164 /* With Binutils 2.15, the "@unwind" marker must be specified on
30165 every occurrence of the ".eh_frame" section, not just the first
30166 one. */
30167 if (TARGET_64BIT
30168 && strcmp (name, ".eh_frame") == 0)
30169 {
30170 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
30171 flags & SECTION_WRITE ? "aw" : "a");
30172 return;
30173 }
30174 default_elf_asm_named_section (name, flags, decl);
30175 }
30176
30177 /* Return the mangling of TYPE if it is an extended fundamental type. */
30178
30179 static const char *
30180 ix86_mangle_type (const_tree type)
30181 {
30182 type = TYPE_MAIN_VARIANT (type);
30183
30184 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
30185 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
30186 return NULL;
30187
30188 switch (TYPE_MODE (type))
30189 {
30190 case TFmode:
30191 /* __float128 is "g". */
30192 return "g";
30193 case XFmode:
30194 /* "long double" or __float80 is "e". */
30195 return "e";
30196 default:
30197 return NULL;
30198 }
30199 }
30200
30201 /* For 32-bit code we can save PIC register setup by using
30202 __stack_chk_fail_local hidden function instead of calling
30203 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
30204 register, so it is better to call __stack_chk_fail directly. */
30205
30206 static tree
30207 ix86_stack_protect_fail (void)
30208 {
30209 return TARGET_64BIT
30210 ? default_external_stack_protect_fail ()
30211 : default_hidden_stack_protect_fail ();
30212 }
30213
30214 /* Select a format to encode pointers in exception handling data. CODE
30215 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
30216 true if the symbol may be affected by dynamic relocations.
30217
30218 ??? All x86 object file formats are capable of representing this.
30219 After all, the relocation needed is the same as for the call insn.
30220 Whether or not a particular assembler allows us to enter such, I
30221 guess we'll have to see. */
30222 int
30223 asm_preferred_eh_data_format (int code, int global)
30224 {
30225 if (flag_pic)
30226 {
30227 int type = DW_EH_PE_sdata8;
30228 if (!TARGET_64BIT
30229 || ix86_cmodel == CM_SMALL_PIC
30230 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
30231 type = DW_EH_PE_sdata4;
30232 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
30233 }
30234 if (ix86_cmodel == CM_SMALL
30235 || (ix86_cmodel == CM_MEDIUM && code))
30236 return DW_EH_PE_udata4;
30237 return DW_EH_PE_absptr;
30238 }
30239 \f
30240 /* Expand copysign from SIGN to the positive value ABS_VALUE
30241 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
30242 the sign-bit. */
30243 static void
30244 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
30245 {
30246 enum machine_mode mode = GET_MODE (sign);
30247 rtx sgn = gen_reg_rtx (mode);
30248 if (mask == NULL_RTX)
30249 {
30250 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
30251 if (!VECTOR_MODE_P (mode))
30252 {
30253 /* We need to generate a scalar mode mask in this case. */
30254 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
30255 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
30256 mask = gen_reg_rtx (mode);
30257 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
30258 }
30259 }
30260 else
30261 mask = gen_rtx_NOT (mode, mask);
30262 emit_insn (gen_rtx_SET (VOIDmode, sgn,
30263 gen_rtx_AND (mode, mask, sign)));
30264 emit_insn (gen_rtx_SET (VOIDmode, result,
30265 gen_rtx_IOR (mode, abs_value, sgn)));
30266 }
30267
30268 /* Expand fabs (OP0) and return a new rtx that holds the result. The
30269 mask for masking out the sign-bit is stored in *SMASK, if that is
30270 non-null. */
30271 static rtx
30272 ix86_expand_sse_fabs (rtx op0, rtx *smask)
30273 {
30274 enum machine_mode mode = GET_MODE (op0);
30275 rtx xa, mask;
30276
30277 xa = gen_reg_rtx (mode);
30278 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
30279 if (!VECTOR_MODE_P (mode))
30280 {
30281 /* We need to generate a scalar mode mask in this case. */
30282 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
30283 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
30284 mask = gen_reg_rtx (mode);
30285 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
30286 }
30287 emit_insn (gen_rtx_SET (VOIDmode, xa,
30288 gen_rtx_AND (mode, op0, mask)));
30289
30290 if (smask)
30291 *smask = mask;
30292
30293 return xa;
30294 }
30295
30296 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
30297 swapping the operands if SWAP_OPERANDS is true. The expanded
30298 code is a forward jump to a newly created label in case the
30299 comparison is true. The generated label rtx is returned. */
30300 static rtx
30301 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
30302 bool swap_operands)
30303 {
30304 rtx label, tmp;
30305
30306 if (swap_operands)
30307 {
30308 tmp = op0;
30309 op0 = op1;
30310 op1 = tmp;
30311 }
30312
30313 label = gen_label_rtx ();
30314 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
30315 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30316 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
30317 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
30318 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
30319 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
30320 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
30321 JUMP_LABEL (tmp) = label;
30322
30323 return label;
30324 }
30325
30326 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
30327 using comparison code CODE. Operands are swapped for the comparison if
30328 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
30329 static rtx
30330 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
30331 bool swap_operands)
30332 {
30333 enum machine_mode mode = GET_MODE (op0);
30334 rtx mask = gen_reg_rtx (mode);
30335
30336 if (swap_operands)
30337 {
30338 rtx tmp = op0;
30339 op0 = op1;
30340 op1 = tmp;
30341 }
30342
30343 if (mode == DFmode)
30344 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
30345 gen_rtx_fmt_ee (code, mode, op0, op1)));
30346 else
30347 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
30348 gen_rtx_fmt_ee (code, mode, op0, op1)));
30349
30350 return mask;
30351 }
30352
30353 /* Generate and return a rtx of mode MODE for 2**n where n is the number
30354 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
30355 static rtx
30356 ix86_gen_TWO52 (enum machine_mode mode)
30357 {
30358 REAL_VALUE_TYPE TWO52r;
30359 rtx TWO52;
30360
30361 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
30362 TWO52 = const_double_from_real_value (TWO52r, mode);
30363 TWO52 = force_reg (mode, TWO52);
30364
30365 return TWO52;
30366 }
30367
30368 /* Expand SSE sequence for computing lround from OP1 storing
30369 into OP0. */
30370 void
30371 ix86_expand_lround (rtx op0, rtx op1)
30372 {
30373 /* C code for the stuff we're doing below:
30374 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
30375 return (long)tmp;
30376 */
30377 enum machine_mode mode = GET_MODE (op1);
30378 const struct real_format *fmt;
30379 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
30380 rtx adj;
30381
30382 /* load nextafter (0.5, 0.0) */
30383 fmt = REAL_MODE_FORMAT (mode);
30384 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
30385 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
30386
30387 /* adj = copysign (0.5, op1) */
30388 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
30389 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
30390
30391 /* adj = op1 + adj */
30392 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
30393
30394 /* op0 = (imode)adj */
30395 expand_fix (op0, adj, 0);
30396 }
30397
30398 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
30399 into OPERAND0. */
30400 void
30401 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
30402 {
30403 /* C code for the stuff we're doing below (for do_floor):
30404 xi = (long)op1;
30405 xi -= (double)xi > op1 ? 1 : 0;
30406 return xi;
30407 */
30408 enum machine_mode fmode = GET_MODE (op1);
30409 enum machine_mode imode = GET_MODE (op0);
30410 rtx ireg, freg, label, tmp;
30411
30412 /* reg = (long)op1 */
30413 ireg = gen_reg_rtx (imode);
30414 expand_fix (ireg, op1, 0);
30415
30416 /* freg = (double)reg */
30417 freg = gen_reg_rtx (fmode);
30418 expand_float (freg, ireg, 0);
30419
30420 /* ireg = (freg > op1) ? ireg - 1 : ireg */
30421 label = ix86_expand_sse_compare_and_jump (UNLE,
30422 freg, op1, !do_floor);
30423 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
30424 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
30425 emit_move_insn (ireg, tmp);
30426
30427 emit_label (label);
30428 LABEL_NUSES (label) = 1;
30429
30430 emit_move_insn (op0, ireg);
30431 }
30432
30433 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
30434 result in OPERAND0. */
30435 void
30436 ix86_expand_rint (rtx operand0, rtx operand1)
30437 {
30438 /* C code for the stuff we're doing below:
30439 xa = fabs (operand1);
30440 if (!isless (xa, 2**52))
30441 return operand1;
30442 xa = xa + 2**52 - 2**52;
30443 return copysign (xa, operand1);
30444 */
30445 enum machine_mode mode = GET_MODE (operand0);
30446 rtx res, xa, label, TWO52, mask;
30447
30448 res = gen_reg_rtx (mode);
30449 emit_move_insn (res, operand1);
30450
30451 /* xa = abs (operand1) */
30452 xa = ix86_expand_sse_fabs (res, &mask);
30453
30454 /* if (!isless (xa, TWO52)) goto label; */
30455 TWO52 = ix86_gen_TWO52 (mode);
30456 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30457
30458 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30459 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
30460
30461 ix86_sse_copysign_to_positive (res, xa, res, mask);
30462
30463 emit_label (label);
30464 LABEL_NUSES (label) = 1;
30465
30466 emit_move_insn (operand0, res);
30467 }
30468
30469 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
30470 into OPERAND0. */
30471 void
30472 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
30473 {
30474 /* C code for the stuff we expand below.
30475 double xa = fabs (x), x2;
30476 if (!isless (xa, TWO52))
30477 return x;
30478 xa = xa + TWO52 - TWO52;
30479 x2 = copysign (xa, x);
30480 Compensate. Floor:
30481 if (x2 > x)
30482 x2 -= 1;
30483 Compensate. Ceil:
30484 if (x2 < x)
30485 x2 -= -1;
30486 return x2;
30487 */
30488 enum machine_mode mode = GET_MODE (operand0);
30489 rtx xa, TWO52, tmp, label, one, res, mask;
30490
30491 TWO52 = ix86_gen_TWO52 (mode);
30492
30493 /* Temporary for holding the result, initialized to the input
30494 operand to ease control flow. */
30495 res = gen_reg_rtx (mode);
30496 emit_move_insn (res, operand1);
30497
30498 /* xa = abs (operand1) */
30499 xa = ix86_expand_sse_fabs (res, &mask);
30500
30501 /* if (!isless (xa, TWO52)) goto label; */
30502 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30503
30504 /* xa = xa + TWO52 - TWO52; */
30505 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30506 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
30507
30508 /* xa = copysign (xa, operand1) */
30509 ix86_sse_copysign_to_positive (xa, xa, res, mask);
30510
30511 /* generate 1.0 or -1.0 */
30512 one = force_reg (mode,
30513 const_double_from_real_value (do_floor
30514 ? dconst1 : dconstm1, mode));
30515
30516 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
30517 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
30518 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30519 gen_rtx_AND (mode, one, tmp)));
30520 /* We always need to subtract here to preserve signed zero. */
30521 tmp = expand_simple_binop (mode, MINUS,
30522 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30523 emit_move_insn (res, tmp);
30524
30525 emit_label (label);
30526 LABEL_NUSES (label) = 1;
30527
30528 emit_move_insn (operand0, res);
30529 }
30530
30531 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
30532 into OPERAND0. */
30533 void
30534 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
30535 {
30536 /* C code for the stuff we expand below.
30537 double xa = fabs (x), x2;
30538 if (!isless (xa, TWO52))
30539 return x;
30540 x2 = (double)(long)x;
30541 Compensate. Floor:
30542 if (x2 > x)
30543 x2 -= 1;
30544 Compensate. Ceil:
30545 if (x2 < x)
30546 x2 += 1;
30547 if (HONOR_SIGNED_ZEROS (mode))
30548 return copysign (x2, x);
30549 return x2;
30550 */
30551 enum machine_mode mode = GET_MODE (operand0);
30552 rtx xa, xi, TWO52, tmp, label, one, res, mask;
30553
30554 TWO52 = ix86_gen_TWO52 (mode);
30555
30556 /* Temporary for holding the result, initialized to the input
30557 operand to ease control flow. */
30558 res = gen_reg_rtx (mode);
30559 emit_move_insn (res, operand1);
30560
30561 /* xa = abs (operand1) */
30562 xa = ix86_expand_sse_fabs (res, &mask);
30563
30564 /* if (!isless (xa, TWO52)) goto label; */
30565 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30566
30567 /* xa = (double)(long)x */
30568 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30569 expand_fix (xi, res, 0);
30570 expand_float (xa, xi, 0);
30571
30572 /* generate 1.0 */
30573 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
30574
30575 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
30576 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
30577 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30578 gen_rtx_AND (mode, one, tmp)));
30579 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
30580 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30581 emit_move_insn (res, tmp);
30582
30583 if (HONOR_SIGNED_ZEROS (mode))
30584 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
30585
30586 emit_label (label);
30587 LABEL_NUSES (label) = 1;
30588
30589 emit_move_insn (operand0, res);
30590 }
30591
30592 /* Expand SSE sequence for computing round from OPERAND1 storing
30593 into OPERAND0. Sequence that works without relying on DImode truncation
30594 via cvttsd2siq that is only available on 64bit targets. */
30595 void
30596 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
30597 {
30598 /* C code for the stuff we expand below.
30599 double xa = fabs (x), xa2, x2;
30600 if (!isless (xa, TWO52))
30601 return x;
30602 Using the absolute value and copying back sign makes
30603 -0.0 -> -0.0 correct.
30604 xa2 = xa + TWO52 - TWO52;
30605 Compensate.
30606 dxa = xa2 - xa;
30607 if (dxa <= -0.5)
30608 xa2 += 1;
30609 else if (dxa > 0.5)
30610 xa2 -= 1;
30611 x2 = copysign (xa2, x);
30612 return x2;
30613 */
30614 enum machine_mode mode = GET_MODE (operand0);
30615 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
30616
30617 TWO52 = ix86_gen_TWO52 (mode);
30618
30619 /* Temporary for holding the result, initialized to the input
30620 operand to ease control flow. */
30621 res = gen_reg_rtx (mode);
30622 emit_move_insn (res, operand1);
30623
30624 /* xa = abs (operand1) */
30625 xa = ix86_expand_sse_fabs (res, &mask);
30626
30627 /* if (!isless (xa, TWO52)) goto label; */
30628 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30629
30630 /* xa2 = xa + TWO52 - TWO52; */
30631 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30632 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
30633
30634 /* dxa = xa2 - xa; */
30635 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
30636
30637 /* generate 0.5, 1.0 and -0.5 */
30638 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
30639 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
30640 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
30641 0, OPTAB_DIRECT);
30642
30643 /* Compensate. */
30644 tmp = gen_reg_rtx (mode);
30645 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
30646 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
30647 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30648 gen_rtx_AND (mode, one, tmp)));
30649 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30650 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
30651 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
30652 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30653 gen_rtx_AND (mode, one, tmp)));
30654 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30655
30656 /* res = copysign (xa2, operand1) */
30657 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
30658
30659 emit_label (label);
30660 LABEL_NUSES (label) = 1;
30661
30662 emit_move_insn (operand0, res);
30663 }
30664
30665 /* Expand SSE sequence for computing trunc from OPERAND1 storing
30666 into OPERAND0. */
30667 void
30668 ix86_expand_trunc (rtx operand0, rtx operand1)
30669 {
30670 /* C code for SSE variant we expand below.
30671 double xa = fabs (x), x2;
30672 if (!isless (xa, TWO52))
30673 return x;
30674 x2 = (double)(long)x;
30675 if (HONOR_SIGNED_ZEROS (mode))
30676 return copysign (x2, x);
30677 return x2;
30678 */
30679 enum machine_mode mode = GET_MODE (operand0);
30680 rtx xa, xi, TWO52, label, res, mask;
30681
30682 TWO52 = ix86_gen_TWO52 (mode);
30683
30684 /* Temporary for holding the result, initialized to the input
30685 operand to ease control flow. */
30686 res = gen_reg_rtx (mode);
30687 emit_move_insn (res, operand1);
30688
30689 /* xa = abs (operand1) */
30690 xa = ix86_expand_sse_fabs (res, &mask);
30691
30692 /* if (!isless (xa, TWO52)) goto label; */
30693 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30694
30695 /* x = (double)(long)x */
30696 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30697 expand_fix (xi, res, 0);
30698 expand_float (res, xi, 0);
30699
30700 if (HONOR_SIGNED_ZEROS (mode))
30701 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
30702
30703 emit_label (label);
30704 LABEL_NUSES (label) = 1;
30705
30706 emit_move_insn (operand0, res);
30707 }
30708
30709 /* Expand SSE sequence for computing trunc from OPERAND1 storing
30710 into OPERAND0. */
30711 void
30712 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
30713 {
30714 enum machine_mode mode = GET_MODE (operand0);
30715 rtx xa, mask, TWO52, label, one, res, smask, tmp;
30716
30717 /* C code for SSE variant we expand below.
30718 double xa = fabs (x), x2;
30719 if (!isless (xa, TWO52))
30720 return x;
30721 xa2 = xa + TWO52 - TWO52;
30722 Compensate:
30723 if (xa2 > xa)
30724 xa2 -= 1.0;
30725 x2 = copysign (xa2, x);
30726 return x2;
30727 */
30728
30729 TWO52 = ix86_gen_TWO52 (mode);
30730
30731 /* Temporary for holding the result, initialized to the input
30732 operand to ease control flow. */
30733 res = gen_reg_rtx (mode);
30734 emit_move_insn (res, operand1);
30735
30736 /* xa = abs (operand1) */
30737 xa = ix86_expand_sse_fabs (res, &smask);
30738
30739 /* if (!isless (xa, TWO52)) goto label; */
30740 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30741
30742 /* res = xa + TWO52 - TWO52; */
30743 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30744 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
30745 emit_move_insn (res, tmp);
30746
30747 /* generate 1.0 */
30748 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
30749
30750 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
30751 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
30752 emit_insn (gen_rtx_SET (VOIDmode, mask,
30753 gen_rtx_AND (mode, mask, one)));
30754 tmp = expand_simple_binop (mode, MINUS,
30755 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
30756 emit_move_insn (res, tmp);
30757
30758 /* res = copysign (res, operand1) */
30759 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
30760
30761 emit_label (label);
30762 LABEL_NUSES (label) = 1;
30763
30764 emit_move_insn (operand0, res);
30765 }
30766
30767 /* Expand SSE sequence for computing round from OPERAND1 storing
30768 into OPERAND0. */
30769 void
30770 ix86_expand_round (rtx operand0, rtx operand1)
30771 {
30772 /* C code for the stuff we're doing below:
30773 double xa = fabs (x);
30774 if (!isless (xa, TWO52))
30775 return x;
30776 xa = (double)(long)(xa + nextafter (0.5, 0.0));
30777 return copysign (xa, x);
30778 */
30779 enum machine_mode mode = GET_MODE (operand0);
30780 rtx res, TWO52, xa, label, xi, half, mask;
30781 const struct real_format *fmt;
30782 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
30783
30784 /* Temporary for holding the result, initialized to the input
30785 operand to ease control flow. */
30786 res = gen_reg_rtx (mode);
30787 emit_move_insn (res, operand1);
30788
30789 TWO52 = ix86_gen_TWO52 (mode);
30790 xa = ix86_expand_sse_fabs (res, &mask);
30791 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30792
30793 /* load nextafter (0.5, 0.0) */
30794 fmt = REAL_MODE_FORMAT (mode);
30795 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
30796 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
30797
30798 /* xa = xa + 0.5 */
30799 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
30800 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
30801
30802 /* xa = (double)(int64_t)xa */
30803 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30804 expand_fix (xi, xa, 0);
30805 expand_float (xa, xi, 0);
30806
30807 /* res = copysign (xa, operand1) */
30808 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
30809
30810 emit_label (label);
30811 LABEL_NUSES (label) = 1;
30812
30813 emit_move_insn (operand0, res);
30814 }
30815 \f
30816
30817 /* Table of valid machine attributes. */
30818 static const struct attribute_spec ix86_attribute_table[] =
30819 {
30820 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
30821 /* Stdcall attribute says callee is responsible for popping arguments
30822 if they are not variable. */
30823 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30824 /* Fastcall attribute says callee is responsible for popping arguments
30825 if they are not variable. */
30826 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30827 /* Thiscall attribute says callee is responsible for popping arguments
30828 if they are not variable. */
30829 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30830 /* Cdecl attribute says the callee is a normal C declaration */
30831 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30832 /* Regparm attribute specifies how many integer arguments are to be
30833 passed in registers. */
30834 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
30835 /* Sseregparm attribute says we are using x86_64 calling conventions
30836 for FP arguments. */
30837 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30838 /* force_align_arg_pointer says this function realigns the stack at entry. */
30839 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
30840 false, true, true, ix86_handle_cconv_attribute },
30841 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30842 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
30843 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
30844 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
30845 #endif
30846 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30847 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30848 #ifdef SUBTARGET_ATTRIBUTE_TABLE
30849 SUBTARGET_ATTRIBUTE_TABLE,
30850 #endif
30851 /* ms_abi and sysv_abi calling convention function attributes. */
30852 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30853 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30854 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute },
30855 /* End element. */
30856 { NULL, 0, 0, false, false, false, NULL }
30857 };
30858
30859 /* Implement targetm.vectorize.builtin_vectorization_cost. */
30860 static int
30861 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
30862 tree vectype ATTRIBUTE_UNUSED,
30863 int misalign ATTRIBUTE_UNUSED)
30864 {
30865 switch (type_of_cost)
30866 {
30867 case scalar_stmt:
30868 return ix86_cost->scalar_stmt_cost;
30869
30870 case scalar_load:
30871 return ix86_cost->scalar_load_cost;
30872
30873 case scalar_store:
30874 return ix86_cost->scalar_store_cost;
30875
30876 case vector_stmt:
30877 return ix86_cost->vec_stmt_cost;
30878
30879 case vector_load:
30880 return ix86_cost->vec_align_load_cost;
30881
30882 case vector_store:
30883 return ix86_cost->vec_store_cost;
30884
30885 case vec_to_scalar:
30886 return ix86_cost->vec_to_scalar_cost;
30887
30888 case scalar_to_vec:
30889 return ix86_cost->scalar_to_vec_cost;
30890
30891 case unaligned_load:
30892 case unaligned_store:
30893 return ix86_cost->vec_unalign_load_cost;
30894
30895 case cond_branch_taken:
30896 return ix86_cost->cond_taken_branch_cost;
30897
30898 case cond_branch_not_taken:
30899 return ix86_cost->cond_not_taken_branch_cost;
30900
30901 case vec_perm:
30902 return 1;
30903
30904 default:
30905 gcc_unreachable ();
30906 }
30907 }
30908
30909
30910 /* Implement targetm.vectorize.builtin_vec_perm. */
30911
30912 static tree
30913 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
30914 {
30915 tree itype = TREE_TYPE (vec_type);
30916 bool u = TYPE_UNSIGNED (itype);
30917 enum machine_mode vmode = TYPE_MODE (vec_type);
30918 enum ix86_builtins fcode;
30919 bool ok = TARGET_SSE2;
30920
30921 switch (vmode)
30922 {
30923 case V4DFmode:
30924 ok = TARGET_AVX;
30925 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
30926 goto get_di;
30927 case V2DFmode:
30928 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
30929 get_di:
30930 itype = ix86_get_builtin_type (IX86_BT_DI);
30931 break;
30932
30933 case V8SFmode:
30934 ok = TARGET_AVX;
30935 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
30936 goto get_si;
30937 case V4SFmode:
30938 ok = TARGET_SSE;
30939 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
30940 get_si:
30941 itype = ix86_get_builtin_type (IX86_BT_SI);
30942 break;
30943
30944 case V2DImode:
30945 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
30946 break;
30947 case V4SImode:
30948 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
30949 break;
30950 case V8HImode:
30951 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
30952 break;
30953 case V16QImode:
30954 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
30955 break;
30956 default:
30957 ok = false;
30958 break;
30959 }
30960
30961 if (!ok)
30962 return NULL_TREE;
30963
30964 *mask_type = itype;
30965 return ix86_builtins[(int) fcode];
30966 }
30967
30968 /* Return a vector mode with twice as many elements as VMODE. */
30969 /* ??? Consider moving this to a table generated by genmodes.c. */
30970
30971 static enum machine_mode
30972 doublesize_vector_mode (enum machine_mode vmode)
30973 {
30974 switch (vmode)
30975 {
30976 case V2SFmode: return V4SFmode;
30977 case V1DImode: return V2DImode;
30978 case V2SImode: return V4SImode;
30979 case V4HImode: return V8HImode;
30980 case V8QImode: return V16QImode;
30981
30982 case V2DFmode: return V4DFmode;
30983 case V4SFmode: return V8SFmode;
30984 case V2DImode: return V4DImode;
30985 case V4SImode: return V8SImode;
30986 case V8HImode: return V16HImode;
30987 case V16QImode: return V32QImode;
30988
30989 case V4DFmode: return V8DFmode;
30990 case V8SFmode: return V16SFmode;
30991 case V4DImode: return V8DImode;
30992 case V8SImode: return V16SImode;
30993 case V16HImode: return V32HImode;
30994 case V32QImode: return V64QImode;
30995
30996 default:
30997 gcc_unreachable ();
30998 }
30999 }
31000
31001 /* Construct (set target (vec_select op0 (parallel perm))) and
31002 return true if that's a valid instruction in the active ISA. */
31003
31004 static bool
31005 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
31006 {
31007 rtx rperm[MAX_VECT_LEN], x;
31008 unsigned i;
31009
31010 for (i = 0; i < nelt; ++i)
31011 rperm[i] = GEN_INT (perm[i]);
31012
31013 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
31014 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
31015 x = gen_rtx_SET (VOIDmode, target, x);
31016
31017 x = emit_insn (x);
31018 if (recog_memoized (x) < 0)
31019 {
31020 remove_insn (x);
31021 return false;
31022 }
31023 return true;
31024 }
31025
31026 /* Similar, but generate a vec_concat from op0 and op1 as well. */
31027
31028 static bool
31029 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
31030 const unsigned char *perm, unsigned nelt)
31031 {
31032 enum machine_mode v2mode;
31033 rtx x;
31034
31035 v2mode = doublesize_vector_mode (GET_MODE (op0));
31036 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
31037 return expand_vselect (target, x, perm, nelt);
31038 }
31039
31040 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
31041 in terms of blendp[sd] / pblendw / pblendvb. */
31042
31043 static bool
31044 expand_vec_perm_blend (struct expand_vec_perm_d *d)
31045 {
31046 enum machine_mode vmode = d->vmode;
31047 unsigned i, mask, nelt = d->nelt;
31048 rtx target, op0, op1, x;
31049
31050 if (!TARGET_SSE4_1 || d->op0 == d->op1)
31051 return false;
31052 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
31053 return false;
31054
31055 /* This is a blend, not a permute. Elements must stay in their
31056 respective lanes. */
31057 for (i = 0; i < nelt; ++i)
31058 {
31059 unsigned e = d->perm[i];
31060 if (!(e == i || e == i + nelt))
31061 return false;
31062 }
31063
31064 if (d->testing_p)
31065 return true;
31066
31067 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
31068 decision should be extracted elsewhere, so that we only try that
31069 sequence once all budget==3 options have been tried. */
31070
31071 /* For bytes, see if bytes move in pairs so we can use pblendw with
31072 an immediate argument, rather than pblendvb with a vector argument. */
31073 if (vmode == V16QImode)
31074 {
31075 bool pblendw_ok = true;
31076 for (i = 0; i < 16 && pblendw_ok; i += 2)
31077 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
31078
31079 if (!pblendw_ok)
31080 {
31081 rtx rperm[16], vperm;
31082
31083 for (i = 0; i < nelt; ++i)
31084 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
31085
31086 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
31087 vperm = force_reg (V16QImode, vperm);
31088
31089 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
31090 return true;
31091 }
31092 }
31093
31094 target = d->target;
31095 op0 = d->op0;
31096 op1 = d->op1;
31097 mask = 0;
31098
31099 switch (vmode)
31100 {
31101 case V4DFmode:
31102 case V8SFmode:
31103 case V2DFmode:
31104 case V4SFmode:
31105 case V8HImode:
31106 for (i = 0; i < nelt; ++i)
31107 mask |= (d->perm[i] >= nelt) << i;
31108 break;
31109
31110 case V2DImode:
31111 for (i = 0; i < 2; ++i)
31112 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
31113 goto do_subreg;
31114
31115 case V4SImode:
31116 for (i = 0; i < 4; ++i)
31117 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
31118 goto do_subreg;
31119
31120 case V16QImode:
31121 for (i = 0; i < 8; ++i)
31122 mask |= (d->perm[i * 2] >= 16) << i;
31123
31124 do_subreg:
31125 vmode = V8HImode;
31126 target = gen_lowpart (vmode, target);
31127 op0 = gen_lowpart (vmode, op0);
31128 op1 = gen_lowpart (vmode, op1);
31129 break;
31130
31131 default:
31132 gcc_unreachable ();
31133 }
31134
31135 /* This matches five different patterns with the different modes. */
31136 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
31137 x = gen_rtx_SET (VOIDmode, target, x);
31138 emit_insn (x);
31139
31140 return true;
31141 }
31142
31143 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
31144 in terms of the variable form of vpermilps.
31145
31146 Note that we will have already failed the immediate input vpermilps,
31147 which requires that the high and low part shuffle be identical; the
31148 variable form doesn't require that. */
31149
31150 static bool
31151 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
31152 {
31153 rtx rperm[8], vperm;
31154 unsigned i;
31155
31156 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
31157 return false;
31158
31159 /* We can only permute within the 128-bit lane. */
31160 for (i = 0; i < 8; ++i)
31161 {
31162 unsigned e = d->perm[i];
31163 if (i < 4 ? e >= 4 : e < 4)
31164 return false;
31165 }
31166
31167 if (d->testing_p)
31168 return true;
31169
31170 for (i = 0; i < 8; ++i)
31171 {
31172 unsigned e = d->perm[i];
31173
31174 /* Within each 128-bit lane, the elements of op0 are numbered
31175 from 0 and the elements of op1 are numbered from 4. */
31176 if (e >= 8 + 4)
31177 e -= 8;
31178 else if (e >= 4)
31179 e -= 4;
31180
31181 rperm[i] = GEN_INT (e);
31182 }
31183
31184 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
31185 vperm = force_reg (V8SImode, vperm);
31186 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
31187
31188 return true;
31189 }
31190
31191 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
31192 in terms of pshufb or vpperm. */
31193
31194 static bool
31195 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
31196 {
31197 unsigned i, nelt, eltsz;
31198 rtx rperm[16], vperm, target, op0, op1;
31199
31200 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
31201 return false;
31202 if (GET_MODE_SIZE (d->vmode) != 16)
31203 return false;
31204
31205 if (d->testing_p)
31206 return true;
31207
31208 nelt = d->nelt;
31209 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
31210
31211 for (i = 0; i < nelt; ++i)
31212 {
31213 unsigned j, e = d->perm[i];
31214 for (j = 0; j < eltsz; ++j)
31215 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
31216 }
31217
31218 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
31219 vperm = force_reg (V16QImode, vperm);
31220
31221 target = gen_lowpart (V16QImode, d->target);
31222 op0 = gen_lowpart (V16QImode, d->op0);
31223 if (d->op0 == d->op1)
31224 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
31225 else
31226 {
31227 op1 = gen_lowpart (V16QImode, d->op1);
31228 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
31229 }
31230
31231 return true;
31232 }
31233
31234 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
31235 in a single instruction. */
31236
31237 static bool
31238 expand_vec_perm_1 (struct expand_vec_perm_d *d)
31239 {
31240 unsigned i, nelt = d->nelt;
31241 unsigned char perm2[MAX_VECT_LEN];
31242
31243 /* Check plain VEC_SELECT first, because AVX has instructions that could
31244 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
31245 input where SEL+CONCAT may not. */
31246 if (d->op0 == d->op1)
31247 {
31248 int mask = nelt - 1;
31249
31250 for (i = 0; i < nelt; i++)
31251 perm2[i] = d->perm[i] & mask;
31252
31253 if (expand_vselect (d->target, d->op0, perm2, nelt))
31254 return true;
31255
31256 /* There are plenty of patterns in sse.md that are written for
31257 SEL+CONCAT and are not replicated for a single op. Perhaps
31258 that should be changed, to avoid the nastiness here. */
31259
31260 /* Recognize interleave style patterns, which means incrementing
31261 every other permutation operand. */
31262 for (i = 0; i < nelt; i += 2)
31263 {
31264 perm2[i] = d->perm[i] & mask;
31265 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
31266 }
31267 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
31268 return true;
31269
31270 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
31271 if (nelt >= 4)
31272 {
31273 for (i = 0; i < nelt; i += 4)
31274 {
31275 perm2[i + 0] = d->perm[i + 0] & mask;
31276 perm2[i + 1] = d->perm[i + 1] & mask;
31277 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
31278 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
31279 }
31280
31281 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
31282 return true;
31283 }
31284 }
31285
31286 /* Finally, try the fully general two operand permute. */
31287 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
31288 return true;
31289
31290 /* Recognize interleave style patterns with reversed operands. */
31291 if (d->op0 != d->op1)
31292 {
31293 for (i = 0; i < nelt; ++i)
31294 {
31295 unsigned e = d->perm[i];
31296 if (e >= nelt)
31297 e -= nelt;
31298 else
31299 e += nelt;
31300 perm2[i] = e;
31301 }
31302
31303 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
31304 return true;
31305 }
31306
31307 /* Try the SSE4.1 blend variable merge instructions. */
31308 if (expand_vec_perm_blend (d))
31309 return true;
31310
31311 /* Try one of the AVX vpermil variable permutations. */
31312 if (expand_vec_perm_vpermil (d))
31313 return true;
31314
31315 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
31316 if (expand_vec_perm_pshufb (d))
31317 return true;
31318
31319 return false;
31320 }
31321
31322 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
31323 in terms of a pair of pshuflw + pshufhw instructions. */
31324
31325 static bool
31326 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
31327 {
31328 unsigned char perm2[MAX_VECT_LEN];
31329 unsigned i;
31330 bool ok;
31331
31332 if (d->vmode != V8HImode || d->op0 != d->op1)
31333 return false;
31334
31335 /* The two permutations only operate in 64-bit lanes. */
31336 for (i = 0; i < 4; ++i)
31337 if (d->perm[i] >= 4)
31338 return false;
31339 for (i = 4; i < 8; ++i)
31340 if (d->perm[i] < 4)
31341 return false;
31342
31343 if (d->testing_p)
31344 return true;
31345
31346 /* Emit the pshuflw. */
31347 memcpy (perm2, d->perm, 4);
31348 for (i = 4; i < 8; ++i)
31349 perm2[i] = i;
31350 ok = expand_vselect (d->target, d->op0, perm2, 8);
31351 gcc_assert (ok);
31352
31353 /* Emit the pshufhw. */
31354 memcpy (perm2 + 4, d->perm + 4, 4);
31355 for (i = 0; i < 4; ++i)
31356 perm2[i] = i;
31357 ok = expand_vselect (d->target, d->target, perm2, 8);
31358 gcc_assert (ok);
31359
31360 return true;
31361 }
31362
31363 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
31364 the permutation using the SSSE3 palignr instruction. This succeeds
31365 when all of the elements in PERM fit within one vector and we merely
31366 need to shift them down so that a single vector permutation has a
31367 chance to succeed. */
31368
31369 static bool
31370 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
31371 {
31372 unsigned i, nelt = d->nelt;
31373 unsigned min, max;
31374 bool in_order, ok;
31375 rtx shift;
31376
31377 /* Even with AVX, palignr only operates on 128-bit vectors. */
31378 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
31379 return false;
31380
31381 min = nelt, max = 0;
31382 for (i = 0; i < nelt; ++i)
31383 {
31384 unsigned e = d->perm[i];
31385 if (e < min)
31386 min = e;
31387 if (e > max)
31388 max = e;
31389 }
31390 if (min == 0 || max - min >= nelt)
31391 return false;
31392
31393 /* Given that we have SSSE3, we know we'll be able to implement the
31394 single operand permutation after the palignr with pshufb. */
31395 if (d->testing_p)
31396 return true;
31397
31398 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
31399 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
31400 gen_lowpart (TImode, d->op1),
31401 gen_lowpart (TImode, d->op0), shift));
31402
31403 d->op0 = d->op1 = d->target;
31404
31405 in_order = true;
31406 for (i = 0; i < nelt; ++i)
31407 {
31408 unsigned e = d->perm[i] - min;
31409 if (e != i)
31410 in_order = false;
31411 d->perm[i] = e;
31412 }
31413
31414 /* Test for the degenerate case where the alignment by itself
31415 produces the desired permutation. */
31416 if (in_order)
31417 return true;
31418
31419 ok = expand_vec_perm_1 (d);
31420 gcc_assert (ok);
31421
31422 return ok;
31423 }
31424
31425 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
31426 a two vector permutation into a single vector permutation by using
31427 an interleave operation to merge the vectors. */
31428
31429 static bool
31430 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
31431 {
31432 struct expand_vec_perm_d dremap, dfinal;
31433 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
31434 unsigned contents, h1, h2, h3, h4;
31435 unsigned char remap[2 * MAX_VECT_LEN];
31436 rtx seq;
31437 bool ok;
31438
31439 if (d->op0 == d->op1)
31440 return false;
31441
31442 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
31443 lanes. We can use similar techniques with the vperm2f128 instruction,
31444 but it requires slightly different logic. */
31445 if (GET_MODE_SIZE (d->vmode) != 16)
31446 return false;
31447
31448 /* Examine from whence the elements come. */
31449 contents = 0;
31450 for (i = 0; i < nelt; ++i)
31451 contents |= 1u << d->perm[i];
31452
31453 /* Split the two input vectors into 4 halves. */
31454 h1 = (1u << nelt2) - 1;
31455 h2 = h1 << nelt2;
31456 h3 = h2 << nelt2;
31457 h4 = h3 << nelt2;
31458
31459 memset (remap, 0xff, sizeof (remap));
31460 dremap = *d;
31461
31462 /* If the elements from the low halves use interleave low, and similarly
31463 for interleave high. If the elements are from mis-matched halves, we
31464 can use shufps for V4SF/V4SI or do a DImode shuffle. */
31465 if ((contents & (h1 | h3)) == contents)
31466 {
31467 for (i = 0; i < nelt2; ++i)
31468 {
31469 remap[i] = i * 2;
31470 remap[i + nelt] = i * 2 + 1;
31471 dremap.perm[i * 2] = i;
31472 dremap.perm[i * 2 + 1] = i + nelt;
31473 }
31474 }
31475 else if ((contents & (h2 | h4)) == contents)
31476 {
31477 for (i = 0; i < nelt2; ++i)
31478 {
31479 remap[i + nelt2] = i * 2;
31480 remap[i + nelt + nelt2] = i * 2 + 1;
31481 dremap.perm[i * 2] = i + nelt2;
31482 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
31483 }
31484 }
31485 else if ((contents & (h1 | h4)) == contents)
31486 {
31487 for (i = 0; i < nelt2; ++i)
31488 {
31489 remap[i] = i;
31490 remap[i + nelt + nelt2] = i + nelt2;
31491 dremap.perm[i] = i;
31492 dremap.perm[i + nelt2] = i + nelt + nelt2;
31493 }
31494 if (nelt != 4)
31495 {
31496 dremap.vmode = V2DImode;
31497 dremap.nelt = 2;
31498 dremap.perm[0] = 0;
31499 dremap.perm[1] = 3;
31500 }
31501 }
31502 else if ((contents & (h2 | h3)) == contents)
31503 {
31504 for (i = 0; i < nelt2; ++i)
31505 {
31506 remap[i + nelt2] = i;
31507 remap[i + nelt] = i + nelt2;
31508 dremap.perm[i] = i + nelt2;
31509 dremap.perm[i + nelt2] = i + nelt;
31510 }
31511 if (nelt != 4)
31512 {
31513 dremap.vmode = V2DImode;
31514 dremap.nelt = 2;
31515 dremap.perm[0] = 1;
31516 dremap.perm[1] = 2;
31517 }
31518 }
31519 else
31520 return false;
31521
31522 /* Use the remapping array set up above to move the elements from their
31523 swizzled locations into their final destinations. */
31524 dfinal = *d;
31525 for (i = 0; i < nelt; ++i)
31526 {
31527 unsigned e = remap[d->perm[i]];
31528 gcc_assert (e < nelt);
31529 dfinal.perm[i] = e;
31530 }
31531 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
31532 dfinal.op1 = dfinal.op0;
31533 dremap.target = dfinal.op0;
31534
31535 /* Test if the final remap can be done with a single insn. For V4SFmode or
31536 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
31537 start_sequence ();
31538 ok = expand_vec_perm_1 (&dfinal);
31539 seq = get_insns ();
31540 end_sequence ();
31541
31542 if (!ok)
31543 return false;
31544
31545 if (dremap.vmode != dfinal.vmode)
31546 {
31547 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
31548 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
31549 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
31550 }
31551
31552 ok = expand_vec_perm_1 (&dremap);
31553 gcc_assert (ok);
31554
31555 emit_insn (seq);
31556 return true;
31557 }
31558
31559 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
31560 permutation with two pshufb insns and an ior. We should have already
31561 failed all two instruction sequences. */
31562
31563 static bool
31564 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
31565 {
31566 rtx rperm[2][16], vperm, l, h, op, m128;
31567 unsigned int i, nelt, eltsz;
31568
31569 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
31570 return false;
31571 gcc_assert (d->op0 != d->op1);
31572
31573 nelt = d->nelt;
31574 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
31575
31576 /* Generate two permutation masks. If the required element is within
31577 the given vector it is shuffled into the proper lane. If the required
31578 element is in the other vector, force a zero into the lane by setting
31579 bit 7 in the permutation mask. */
31580 m128 = GEN_INT (-128);
31581 for (i = 0; i < nelt; ++i)
31582 {
31583 unsigned j, e = d->perm[i];
31584 unsigned which = (e >= nelt);
31585 if (e >= nelt)
31586 e -= nelt;
31587
31588 for (j = 0; j < eltsz; ++j)
31589 {
31590 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
31591 rperm[1-which][i*eltsz + j] = m128;
31592 }
31593 }
31594
31595 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
31596 vperm = force_reg (V16QImode, vperm);
31597
31598 l = gen_reg_rtx (V16QImode);
31599 op = gen_lowpart (V16QImode, d->op0);
31600 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
31601
31602 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
31603 vperm = force_reg (V16QImode, vperm);
31604
31605 h = gen_reg_rtx (V16QImode);
31606 op = gen_lowpart (V16QImode, d->op1);
31607 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
31608
31609 op = gen_lowpart (V16QImode, d->target);
31610 emit_insn (gen_iorv16qi3 (op, l, h));
31611
31612 return true;
31613 }
31614
31615 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
31616 and extract-odd permutations. */
31617
31618 static bool
31619 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
31620 {
31621 rtx t1, t2, t3, t4;
31622
31623 switch (d->vmode)
31624 {
31625 case V4DFmode:
31626 t1 = gen_reg_rtx (V4DFmode);
31627 t2 = gen_reg_rtx (V4DFmode);
31628
31629 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
31630 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
31631 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
31632
31633 /* Now an unpck[lh]pd will produce the result required. */
31634 if (odd)
31635 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
31636 else
31637 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
31638 emit_insn (t3);
31639 break;
31640
31641 case V8SFmode:
31642 {
31643 static const unsigned char perm1[8] = { 0, 2, 1, 3, 5, 6, 5, 7 };
31644 static const unsigned char perme[8] = { 0, 1, 8, 9, 4, 5, 12, 13 };
31645 static const unsigned char permo[8] = { 2, 3, 10, 11, 6, 7, 14, 15 };
31646
31647 t1 = gen_reg_rtx (V8SFmode);
31648 t2 = gen_reg_rtx (V8SFmode);
31649 t3 = gen_reg_rtx (V8SFmode);
31650 t4 = gen_reg_rtx (V8SFmode);
31651
31652 /* Shuffle within the 128-bit lanes to produce:
31653 { 0 2 1 3 4 6 5 7 } and { 8 a 9 b c e d f }. */
31654 expand_vselect (t1, d->op0, perm1, 8);
31655 expand_vselect (t2, d->op1, perm1, 8);
31656
31657 /* Shuffle the lanes around to produce:
31658 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
31659 emit_insn (gen_avx_vperm2f128v8sf3 (t3, t1, t2, GEN_INT (0x20)));
31660 emit_insn (gen_avx_vperm2f128v8sf3 (t4, t1, t2, GEN_INT (0x31)));
31661
31662 /* Now a vpermil2p will produce the result required. */
31663 /* ??? The vpermil2p requires a vector constant. Another option
31664 is a unpck[lh]ps to merge the two vectors to produce
31665 { 0 4 2 6 8 c a e } or { 1 5 3 7 9 d b f }. Then use another
31666 vpermilps to get the elements into the final order. */
31667 d->op0 = t3;
31668 d->op1 = t4;
31669 memcpy (d->perm, odd ? permo: perme, 8);
31670 expand_vec_perm_vpermil (d);
31671 }
31672 break;
31673
31674 case V2DFmode:
31675 case V4SFmode:
31676 case V2DImode:
31677 case V4SImode:
31678 /* These are always directly implementable by expand_vec_perm_1. */
31679 gcc_unreachable ();
31680
31681 case V8HImode:
31682 if (TARGET_SSSE3)
31683 return expand_vec_perm_pshufb2 (d);
31684 else
31685 {
31686 /* We need 2*log2(N)-1 operations to achieve odd/even
31687 with interleave. */
31688 t1 = gen_reg_rtx (V8HImode);
31689 t2 = gen_reg_rtx (V8HImode);
31690 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
31691 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
31692 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
31693 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
31694 if (odd)
31695 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
31696 else
31697 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
31698 emit_insn (t3);
31699 }
31700 break;
31701
31702 case V16QImode:
31703 if (TARGET_SSSE3)
31704 return expand_vec_perm_pshufb2 (d);
31705 else
31706 {
31707 t1 = gen_reg_rtx (V16QImode);
31708 t2 = gen_reg_rtx (V16QImode);
31709 t3 = gen_reg_rtx (V16QImode);
31710 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
31711 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
31712 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
31713 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
31714 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
31715 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
31716 if (odd)
31717 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
31718 else
31719 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
31720 emit_insn (t3);
31721 }
31722 break;
31723
31724 default:
31725 gcc_unreachable ();
31726 }
31727
31728 return true;
31729 }
31730
31731 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
31732 extract-even and extract-odd permutations. */
31733
31734 static bool
31735 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
31736 {
31737 unsigned i, odd, nelt = d->nelt;
31738
31739 odd = d->perm[0];
31740 if (odd != 0 && odd != 1)
31741 return false;
31742
31743 for (i = 1; i < nelt; ++i)
31744 if (d->perm[i] != 2 * i + odd)
31745 return false;
31746
31747 return expand_vec_perm_even_odd_1 (d, odd);
31748 }
31749
31750 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
31751 permutations. We assume that expand_vec_perm_1 has already failed. */
31752
31753 static bool
31754 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
31755 {
31756 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
31757 enum machine_mode vmode = d->vmode;
31758 unsigned char perm2[4];
31759 rtx op0 = d->op0;
31760 bool ok;
31761
31762 switch (vmode)
31763 {
31764 case V4DFmode:
31765 case V8SFmode:
31766 /* These are special-cased in sse.md so that we can optionally
31767 use the vbroadcast instruction. They expand to two insns
31768 if the input happens to be in a register. */
31769 gcc_unreachable ();
31770
31771 case V2DFmode:
31772 case V2DImode:
31773 case V4SFmode:
31774 case V4SImode:
31775 /* These are always implementable using standard shuffle patterns. */
31776 gcc_unreachable ();
31777
31778 case V8HImode:
31779 case V16QImode:
31780 /* These can be implemented via interleave. We save one insn by
31781 stopping once we have promoted to V4SImode and then use pshufd. */
31782 do
31783 {
31784 optab otab = vec_interleave_low_optab;
31785
31786 if (elt >= nelt2)
31787 {
31788 otab = vec_interleave_high_optab;
31789 elt -= nelt2;
31790 }
31791 nelt2 /= 2;
31792
31793 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
31794 vmode = get_mode_wider_vector (vmode);
31795 op0 = gen_lowpart (vmode, op0);
31796 }
31797 while (vmode != V4SImode);
31798
31799 memset (perm2, elt, 4);
31800 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
31801 gcc_assert (ok);
31802 return true;
31803
31804 default:
31805 gcc_unreachable ();
31806 }
31807 }
31808
31809 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
31810 broadcast permutations. */
31811
31812 static bool
31813 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
31814 {
31815 unsigned i, elt, nelt = d->nelt;
31816
31817 if (d->op0 != d->op1)
31818 return false;
31819
31820 elt = d->perm[0];
31821 for (i = 1; i < nelt; ++i)
31822 if (d->perm[i] != elt)
31823 return false;
31824
31825 return expand_vec_perm_broadcast_1 (d);
31826 }
31827
31828 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
31829 With all of the interface bits taken care of, perform the expansion
31830 in D and return true on success. */
31831
31832 static bool
31833 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
31834 {
31835 /* Try a single instruction expansion. */
31836 if (expand_vec_perm_1 (d))
31837 return true;
31838
31839 /* Try sequences of two instructions. */
31840
31841 if (expand_vec_perm_pshuflw_pshufhw (d))
31842 return true;
31843
31844 if (expand_vec_perm_palignr (d))
31845 return true;
31846
31847 if (expand_vec_perm_interleave2 (d))
31848 return true;
31849
31850 if (expand_vec_perm_broadcast (d))
31851 return true;
31852
31853 /* Try sequences of three instructions. */
31854
31855 if (expand_vec_perm_pshufb2 (d))
31856 return true;
31857
31858 /* ??? Look for narrow permutations whose element orderings would
31859 allow the promotion to a wider mode. */
31860
31861 /* ??? Look for sequences of interleave or a wider permute that place
31862 the data into the correct lanes for a half-vector shuffle like
31863 pshuf[lh]w or vpermilps. */
31864
31865 /* ??? Look for sequences of interleave that produce the desired results.
31866 The combinatorics of punpck[lh] get pretty ugly... */
31867
31868 if (expand_vec_perm_even_odd (d))
31869 return true;
31870
31871 return false;
31872 }
31873
31874 /* Extract the values from the vector CST into the permutation array in D.
31875 Return 0 on error, 1 if all values from the permutation come from the
31876 first vector, 2 if all values from the second vector, and 3 otherwise. */
31877
31878 static int
31879 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
31880 {
31881 tree list = TREE_VECTOR_CST_ELTS (cst);
31882 unsigned i, nelt = d->nelt;
31883 int ret = 0;
31884
31885 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
31886 {
31887 unsigned HOST_WIDE_INT e;
31888
31889 if (!host_integerp (TREE_VALUE (list), 1))
31890 return 0;
31891 e = tree_low_cst (TREE_VALUE (list), 1);
31892 if (e >= 2 * nelt)
31893 return 0;
31894
31895 ret |= (e < nelt ? 1 : 2);
31896 d->perm[i] = e;
31897 }
31898 gcc_assert (list == NULL);
31899
31900 /* For all elements from second vector, fold the elements to first. */
31901 if (ret == 2)
31902 for (i = 0; i < nelt; ++i)
31903 d->perm[i] -= nelt;
31904
31905 return ret;
31906 }
31907
31908 static rtx
31909 ix86_expand_vec_perm_builtin (tree exp)
31910 {
31911 struct expand_vec_perm_d d;
31912 tree arg0, arg1, arg2;
31913
31914 arg0 = CALL_EXPR_ARG (exp, 0);
31915 arg1 = CALL_EXPR_ARG (exp, 1);
31916 arg2 = CALL_EXPR_ARG (exp, 2);
31917
31918 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
31919 d.nelt = GET_MODE_NUNITS (d.vmode);
31920 d.testing_p = false;
31921 gcc_assert (VECTOR_MODE_P (d.vmode));
31922
31923 if (TREE_CODE (arg2) != VECTOR_CST)
31924 {
31925 error_at (EXPR_LOCATION (exp),
31926 "vector permutation requires vector constant");
31927 goto exit_error;
31928 }
31929
31930 switch (extract_vec_perm_cst (&d, arg2))
31931 {
31932 default:
31933 gcc_unreachable();
31934
31935 case 0:
31936 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
31937 goto exit_error;
31938
31939 case 3:
31940 if (!operand_equal_p (arg0, arg1, 0))
31941 {
31942 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
31943 d.op0 = force_reg (d.vmode, d.op0);
31944 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
31945 d.op1 = force_reg (d.vmode, d.op1);
31946 break;
31947 }
31948
31949 /* The elements of PERM do not suggest that only the first operand
31950 is used, but both operands are identical. Allow easier matching
31951 of the permutation by folding the permutation into the single
31952 input vector. */
31953 {
31954 unsigned i, nelt = d.nelt;
31955 for (i = 0; i < nelt; ++i)
31956 if (d.perm[i] >= nelt)
31957 d.perm[i] -= nelt;
31958 }
31959 /* FALLTHRU */
31960
31961 case 1:
31962 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
31963 d.op0 = force_reg (d.vmode, d.op0);
31964 d.op1 = d.op0;
31965 break;
31966
31967 case 2:
31968 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
31969 d.op0 = force_reg (d.vmode, d.op0);
31970 d.op1 = d.op0;
31971 break;
31972 }
31973
31974 d.target = gen_reg_rtx (d.vmode);
31975 if (ix86_expand_vec_perm_builtin_1 (&d))
31976 return d.target;
31977
31978 /* For compiler generated permutations, we should never got here, because
31979 the compiler should also be checking the ok hook. But since this is a
31980 builtin the user has access too, so don't abort. */
31981 switch (d.nelt)
31982 {
31983 case 2:
31984 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
31985 break;
31986 case 4:
31987 sorry ("vector permutation (%d %d %d %d)",
31988 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
31989 break;
31990 case 8:
31991 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
31992 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
31993 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
31994 break;
31995 case 16:
31996 sorry ("vector permutation "
31997 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
31998 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
31999 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
32000 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
32001 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
32002 break;
32003 default:
32004 gcc_unreachable ();
32005 }
32006 exit_error:
32007 return CONST0_RTX (d.vmode);
32008 }
32009
32010 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
32011
32012 static bool
32013 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
32014 {
32015 struct expand_vec_perm_d d;
32016 int vec_mask;
32017 bool ret, one_vec;
32018
32019 d.vmode = TYPE_MODE (vec_type);
32020 d.nelt = GET_MODE_NUNITS (d.vmode);
32021 d.testing_p = true;
32022
32023 /* Given sufficient ISA support we can just return true here
32024 for selected vector modes. */
32025 if (GET_MODE_SIZE (d.vmode) == 16)
32026 {
32027 /* All implementable with a single vpperm insn. */
32028 if (TARGET_XOP)
32029 return true;
32030 /* All implementable with 2 pshufb + 1 ior. */
32031 if (TARGET_SSSE3)
32032 return true;
32033 /* All implementable with shufpd or unpck[lh]pd. */
32034 if (d.nelt == 2)
32035 return true;
32036 }
32037
32038 vec_mask = extract_vec_perm_cst (&d, mask);
32039
32040 /* This hook is cannot be called in response to something that the
32041 user does (unlike the builtin expander) so we shouldn't ever see
32042 an error generated from the extract. */
32043 gcc_assert (vec_mask > 0 && vec_mask <= 3);
32044 one_vec = (vec_mask != 3);
32045
32046 /* Implementable with shufps or pshufd. */
32047 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
32048 return true;
32049
32050 /* Otherwise we have to go through the motions and see if we can
32051 figure out how to generate the requested permutation. */
32052 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
32053 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
32054 if (!one_vec)
32055 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
32056
32057 start_sequence ();
32058 ret = ix86_expand_vec_perm_builtin_1 (&d);
32059 end_sequence ();
32060
32061 return ret;
32062 }
32063
32064 void
32065 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
32066 {
32067 struct expand_vec_perm_d d;
32068 unsigned i, nelt;
32069
32070 d.target = targ;
32071 d.op0 = op0;
32072 d.op1 = op1;
32073 d.vmode = GET_MODE (targ);
32074 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
32075 d.testing_p = false;
32076
32077 for (i = 0; i < nelt; ++i)
32078 d.perm[i] = i * 2 + odd;
32079
32080 /* We'll either be able to implement the permutation directly... */
32081 if (expand_vec_perm_1 (&d))
32082 return;
32083
32084 /* ... or we use the special-case patterns. */
32085 expand_vec_perm_even_odd_1 (&d, odd);
32086 }
32087 \f
32088 /* This function returns the calling abi specific va_list type node.
32089 It returns the FNDECL specific va_list type. */
32090
32091 static tree
32092 ix86_fn_abi_va_list (tree fndecl)
32093 {
32094 if (!TARGET_64BIT)
32095 return va_list_type_node;
32096 gcc_assert (fndecl != NULL_TREE);
32097
32098 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
32099 return ms_va_list_type_node;
32100 else
32101 return sysv_va_list_type_node;
32102 }
32103
32104 /* Returns the canonical va_list type specified by TYPE. If there
32105 is no valid TYPE provided, it return NULL_TREE. */
32106
32107 static tree
32108 ix86_canonical_va_list_type (tree type)
32109 {
32110 tree wtype, htype;
32111
32112 /* Resolve references and pointers to va_list type. */
32113 if (TREE_CODE (type) == MEM_REF)
32114 type = TREE_TYPE (type);
32115 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
32116 type = TREE_TYPE (type);
32117 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
32118 type = TREE_TYPE (type);
32119
32120 if (TARGET_64BIT)
32121 {
32122 wtype = va_list_type_node;
32123 gcc_assert (wtype != NULL_TREE);
32124 htype = type;
32125 if (TREE_CODE (wtype) == ARRAY_TYPE)
32126 {
32127 /* If va_list is an array type, the argument may have decayed
32128 to a pointer type, e.g. by being passed to another function.
32129 In that case, unwrap both types so that we can compare the
32130 underlying records. */
32131 if (TREE_CODE (htype) == ARRAY_TYPE
32132 || POINTER_TYPE_P (htype))
32133 {
32134 wtype = TREE_TYPE (wtype);
32135 htype = TREE_TYPE (htype);
32136 }
32137 }
32138 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
32139 return va_list_type_node;
32140 wtype = sysv_va_list_type_node;
32141 gcc_assert (wtype != NULL_TREE);
32142 htype = type;
32143 if (TREE_CODE (wtype) == ARRAY_TYPE)
32144 {
32145 /* If va_list is an array type, the argument may have decayed
32146 to a pointer type, e.g. by being passed to another function.
32147 In that case, unwrap both types so that we can compare the
32148 underlying records. */
32149 if (TREE_CODE (htype) == ARRAY_TYPE
32150 || POINTER_TYPE_P (htype))
32151 {
32152 wtype = TREE_TYPE (wtype);
32153 htype = TREE_TYPE (htype);
32154 }
32155 }
32156 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
32157 return sysv_va_list_type_node;
32158 wtype = ms_va_list_type_node;
32159 gcc_assert (wtype != NULL_TREE);
32160 htype = type;
32161 if (TREE_CODE (wtype) == ARRAY_TYPE)
32162 {
32163 /* If va_list is an array type, the argument may have decayed
32164 to a pointer type, e.g. by being passed to another function.
32165 In that case, unwrap both types so that we can compare the
32166 underlying records. */
32167 if (TREE_CODE (htype) == ARRAY_TYPE
32168 || POINTER_TYPE_P (htype))
32169 {
32170 wtype = TREE_TYPE (wtype);
32171 htype = TREE_TYPE (htype);
32172 }
32173 }
32174 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
32175 return ms_va_list_type_node;
32176 return NULL_TREE;
32177 }
32178 return std_canonical_va_list_type (type);
32179 }
32180
32181 /* Iterate through the target-specific builtin types for va_list.
32182 IDX denotes the iterator, *PTREE is set to the result type of
32183 the va_list builtin, and *PNAME to its internal type.
32184 Returns zero if there is no element for this index, otherwise
32185 IDX should be increased upon the next call.
32186 Note, do not iterate a base builtin's name like __builtin_va_list.
32187 Used from c_common_nodes_and_builtins. */
32188
32189 static int
32190 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
32191 {
32192 if (TARGET_64BIT)
32193 {
32194 switch (idx)
32195 {
32196 default:
32197 break;
32198
32199 case 0:
32200 *ptree = ms_va_list_type_node;
32201 *pname = "__builtin_ms_va_list";
32202 return 1;
32203
32204 case 1:
32205 *ptree = sysv_va_list_type_node;
32206 *pname = "__builtin_sysv_va_list";
32207 return 1;
32208 }
32209 }
32210
32211 return 0;
32212 }
32213
32214 #undef TARGET_SCHED_DISPATCH
32215 #define TARGET_SCHED_DISPATCH has_dispatch
32216 #undef TARGET_SCHED_DISPATCH_DO
32217 #define TARGET_SCHED_DISPATCH_DO do_dispatch
32218
32219 /* The size of the dispatch window is the total number of bytes of
32220 object code allowed in a window. */
32221 #define DISPATCH_WINDOW_SIZE 16
32222
32223 /* Number of dispatch windows considered for scheduling. */
32224 #define MAX_DISPATCH_WINDOWS 3
32225
32226 /* Maximum number of instructions in a window. */
32227 #define MAX_INSN 4
32228
32229 /* Maximum number of immediate operands in a window. */
32230 #define MAX_IMM 4
32231
32232 /* Maximum number of immediate bits allowed in a window. */
32233 #define MAX_IMM_SIZE 128
32234
32235 /* Maximum number of 32 bit immediates allowed in a window. */
32236 #define MAX_IMM_32 4
32237
32238 /* Maximum number of 64 bit immediates allowed in a window. */
32239 #define MAX_IMM_64 2
32240
32241 /* Maximum total of loads or prefetches allowed in a window. */
32242 #define MAX_LOAD 2
32243
32244 /* Maximum total of stores allowed in a window. */
32245 #define MAX_STORE 1
32246
32247 #undef BIG
32248 #define BIG 100
32249
32250
32251 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
32252 enum dispatch_group {
32253 disp_no_group = 0,
32254 disp_load,
32255 disp_store,
32256 disp_load_store,
32257 disp_prefetch,
32258 disp_imm,
32259 disp_imm_32,
32260 disp_imm_64,
32261 disp_branch,
32262 disp_cmp,
32263 disp_jcc,
32264 disp_last
32265 };
32266
32267 /* Number of allowable groups in a dispatch window. It is an array
32268 indexed by dispatch_group enum. 100 is used as a big number,
32269 because the number of these kind of operations does not have any
32270 effect in dispatch window, but we need them for other reasons in
32271 the table. */
32272 static unsigned int num_allowable_groups[disp_last] = {
32273 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
32274 };
32275
32276 char group_name[disp_last + 1][16] = {
32277 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
32278 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
32279 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
32280 };
32281
32282 /* Instruction path. */
32283 enum insn_path {
32284 no_path = 0,
32285 path_single, /* Single micro op. */
32286 path_double, /* Double micro op. */
32287 path_multi, /* Instructions with more than 2 micro op.. */
32288 last_path
32289 };
32290
32291 /* sched_insn_info defines a window to the instructions scheduled in
32292 the basic block. It contains a pointer to the insn_info table and
32293 the instruction scheduled.
32294
32295 Windows are allocated for each basic block and are linked
32296 together. */
32297 typedef struct sched_insn_info_s {
32298 rtx insn;
32299 enum dispatch_group group;
32300 enum insn_path path;
32301 int byte_len;
32302 int imm_bytes;
32303 } sched_insn_info;
32304
32305 /* Linked list of dispatch windows. This is a two way list of
32306 dispatch windows of a basic block. It contains information about
32307 the number of uops in the window and the total number of
32308 instructions and of bytes in the object code for this dispatch
32309 window. */
32310 typedef struct dispatch_windows_s {
32311 int num_insn; /* Number of insn in the window. */
32312 int num_uops; /* Number of uops in the window. */
32313 int window_size; /* Number of bytes in the window. */
32314 int window_num; /* Window number between 0 or 1. */
32315 int num_imm; /* Number of immediates in an insn. */
32316 int num_imm_32; /* Number of 32 bit immediates in an insn. */
32317 int num_imm_64; /* Number of 64 bit immediates in an insn. */
32318 int imm_size; /* Total immediates in the window. */
32319 int num_loads; /* Total memory loads in the window. */
32320 int num_stores; /* Total memory stores in the window. */
32321 int violation; /* Violation exists in window. */
32322 sched_insn_info *window; /* Pointer to the window. */
32323 struct dispatch_windows_s *next;
32324 struct dispatch_windows_s *prev;
32325 } dispatch_windows;
32326
32327 /* Immediate valuse used in an insn. */
32328 typedef struct imm_info_s
32329 {
32330 int imm;
32331 int imm32;
32332 int imm64;
32333 } imm_info;
32334
32335 static dispatch_windows *dispatch_window_list;
32336 static dispatch_windows *dispatch_window_list1;
32337
32338 /* Get dispatch group of insn. */
32339
32340 static enum dispatch_group
32341 get_mem_group (rtx insn)
32342 {
32343 enum attr_memory memory;
32344
32345 if (INSN_CODE (insn) < 0)
32346 return disp_no_group;
32347 memory = get_attr_memory (insn);
32348 if (memory == MEMORY_STORE)
32349 return disp_store;
32350
32351 if (memory == MEMORY_LOAD)
32352 return disp_load;
32353
32354 if (memory == MEMORY_BOTH)
32355 return disp_load_store;
32356
32357 return disp_no_group;
32358 }
32359
32360 /* Return true if insn is a compare instruction. */
32361
32362 static bool
32363 is_cmp (rtx insn)
32364 {
32365 enum attr_type type;
32366
32367 type = get_attr_type (insn);
32368 return (type == TYPE_TEST
32369 || type == TYPE_ICMP
32370 || type == TYPE_FCMP
32371 || GET_CODE (PATTERN (insn)) == COMPARE);
32372 }
32373
32374 /* Return true if a dispatch violation encountered. */
32375
32376 static bool
32377 dispatch_violation (void)
32378 {
32379 if (dispatch_window_list->next)
32380 return dispatch_window_list->next->violation;
32381 return dispatch_window_list->violation;
32382 }
32383
32384 /* Return true if insn is a branch instruction. */
32385
32386 static bool
32387 is_branch (rtx insn)
32388 {
32389 return (CALL_P (insn) || JUMP_P (insn));
32390 }
32391
32392 /* Return true if insn is a prefetch instruction. */
32393
32394 static bool
32395 is_prefetch (rtx insn)
32396 {
32397 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
32398 }
32399
32400 /* This function initializes a dispatch window and the list container holding a
32401 pointer to the window. */
32402
32403 static void
32404 init_window (int window_num)
32405 {
32406 int i;
32407 dispatch_windows *new_list;
32408
32409 if (window_num == 0)
32410 new_list = dispatch_window_list;
32411 else
32412 new_list = dispatch_window_list1;
32413
32414 new_list->num_insn = 0;
32415 new_list->num_uops = 0;
32416 new_list->window_size = 0;
32417 new_list->next = NULL;
32418 new_list->prev = NULL;
32419 new_list->window_num = window_num;
32420 new_list->num_imm = 0;
32421 new_list->num_imm_32 = 0;
32422 new_list->num_imm_64 = 0;
32423 new_list->imm_size = 0;
32424 new_list->num_loads = 0;
32425 new_list->num_stores = 0;
32426 new_list->violation = false;
32427
32428 for (i = 0; i < MAX_INSN; i++)
32429 {
32430 new_list->window[i].insn = NULL;
32431 new_list->window[i].group = disp_no_group;
32432 new_list->window[i].path = no_path;
32433 new_list->window[i].byte_len = 0;
32434 new_list->window[i].imm_bytes = 0;
32435 }
32436 return;
32437 }
32438
32439 /* This function allocates and initializes a dispatch window and the
32440 list container holding a pointer to the window. */
32441
32442 static dispatch_windows *
32443 allocate_window (void)
32444 {
32445 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
32446 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
32447
32448 return new_list;
32449 }
32450
32451 /* This routine initializes the dispatch scheduling information. It
32452 initiates building dispatch scheduler tables and constructs the
32453 first dispatch window. */
32454
32455 static void
32456 init_dispatch_sched (void)
32457 {
32458 /* Allocate a dispatch list and a window. */
32459 dispatch_window_list = allocate_window ();
32460 dispatch_window_list1 = allocate_window ();
32461 init_window (0);
32462 init_window (1);
32463 }
32464
32465 /* This function returns true if a branch is detected. End of a basic block
32466 does not have to be a branch, but here we assume only branches end a
32467 window. */
32468
32469 static bool
32470 is_end_basic_block (enum dispatch_group group)
32471 {
32472 return group == disp_branch;
32473 }
32474
32475 /* This function is called when the end of a window processing is reached. */
32476
32477 static void
32478 process_end_window (void)
32479 {
32480 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
32481 if (dispatch_window_list->next)
32482 {
32483 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
32484 gcc_assert (dispatch_window_list->window_size
32485 + dispatch_window_list1->window_size <= 48);
32486 init_window (1);
32487 }
32488 init_window (0);
32489 }
32490
32491 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
32492 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
32493 for 48 bytes of instructions. Note that these windows are not dispatch
32494 windows that their sizes are DISPATCH_WINDOW_SIZE. */
32495
32496 static dispatch_windows *
32497 allocate_next_window (int window_num)
32498 {
32499 if (window_num == 0)
32500 {
32501 if (dispatch_window_list->next)
32502 init_window (1);
32503 init_window (0);
32504 return dispatch_window_list;
32505 }
32506
32507 dispatch_window_list->next = dispatch_window_list1;
32508 dispatch_window_list1->prev = dispatch_window_list;
32509
32510 return dispatch_window_list1;
32511 }
32512
32513 /* Increment the number of immediate operands of an instruction. */
32514
32515 static int
32516 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
32517 {
32518 if (*in_rtx == 0)
32519 return 0;
32520
32521 switch ( GET_CODE (*in_rtx))
32522 {
32523 case CONST:
32524 case SYMBOL_REF:
32525 case CONST_INT:
32526 (imm_values->imm)++;
32527 if (x86_64_immediate_operand (*in_rtx, SImode))
32528 (imm_values->imm32)++;
32529 else
32530 (imm_values->imm64)++;
32531 break;
32532
32533 case CONST_DOUBLE:
32534 (imm_values->imm)++;
32535 (imm_values->imm64)++;
32536 break;
32537
32538 case CODE_LABEL:
32539 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
32540 {
32541 (imm_values->imm)++;
32542 (imm_values->imm32)++;
32543 }
32544 break;
32545
32546 default:
32547 break;
32548 }
32549
32550 return 0;
32551 }
32552
32553 /* Compute number of immediate operands of an instruction. */
32554
32555 static void
32556 find_constant (rtx in_rtx, imm_info *imm_values)
32557 {
32558 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
32559 (rtx_function) find_constant_1, (void *) imm_values);
32560 }
32561
32562 /* Return total size of immediate operands of an instruction along with number
32563 of corresponding immediate-operands. It initializes its parameters to zero
32564 befor calling FIND_CONSTANT.
32565 INSN is the input instruction. IMM is the total of immediates.
32566 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
32567 bit immediates. */
32568
32569 static int
32570 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
32571 {
32572 imm_info imm_values = {0, 0, 0};
32573
32574 find_constant (insn, &imm_values);
32575 *imm = imm_values.imm;
32576 *imm32 = imm_values.imm32;
32577 *imm64 = imm_values.imm64;
32578 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
32579 }
32580
32581 /* This function indicates if an operand of an instruction is an
32582 immediate. */
32583
32584 static bool
32585 has_immediate (rtx insn)
32586 {
32587 int num_imm_operand;
32588 int num_imm32_operand;
32589 int num_imm64_operand;
32590
32591 if (insn)
32592 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32593 &num_imm64_operand);
32594 return false;
32595 }
32596
32597 /* Return single or double path for instructions. */
32598
32599 static enum insn_path
32600 get_insn_path (rtx insn)
32601 {
32602 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
32603
32604 if ((int)path == 0)
32605 return path_single;
32606
32607 if ((int)path == 1)
32608 return path_double;
32609
32610 return path_multi;
32611 }
32612
32613 /* Return insn dispatch group. */
32614
32615 static enum dispatch_group
32616 get_insn_group (rtx insn)
32617 {
32618 enum dispatch_group group = get_mem_group (insn);
32619 if (group)
32620 return group;
32621
32622 if (is_branch (insn))
32623 return disp_branch;
32624
32625 if (is_cmp (insn))
32626 return disp_cmp;
32627
32628 if (has_immediate (insn))
32629 return disp_imm;
32630
32631 if (is_prefetch (insn))
32632 return disp_prefetch;
32633
32634 return disp_no_group;
32635 }
32636
32637 /* Count number of GROUP restricted instructions in a dispatch
32638 window WINDOW_LIST. */
32639
32640 static int
32641 count_num_restricted (rtx insn, dispatch_windows *window_list)
32642 {
32643 enum dispatch_group group = get_insn_group (insn);
32644 int imm_size;
32645 int num_imm_operand;
32646 int num_imm32_operand;
32647 int num_imm64_operand;
32648
32649 if (group == disp_no_group)
32650 return 0;
32651
32652 if (group == disp_imm)
32653 {
32654 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32655 &num_imm64_operand);
32656 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
32657 || num_imm_operand + window_list->num_imm > MAX_IMM
32658 || (num_imm32_operand > 0
32659 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
32660 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
32661 || (num_imm64_operand > 0
32662 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
32663 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
32664 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
32665 && num_imm64_operand > 0
32666 && ((window_list->num_imm_64 > 0
32667 && window_list->num_insn >= 2)
32668 || window_list->num_insn >= 3)))
32669 return BIG;
32670
32671 return 1;
32672 }
32673
32674 if ((group == disp_load_store
32675 && (window_list->num_loads >= MAX_LOAD
32676 || window_list->num_stores >= MAX_STORE))
32677 || ((group == disp_load
32678 || group == disp_prefetch)
32679 && window_list->num_loads >= MAX_LOAD)
32680 || (group == disp_store
32681 && window_list->num_stores >= MAX_STORE))
32682 return BIG;
32683
32684 return 1;
32685 }
32686
32687 /* This function returns true if insn satisfies dispatch rules on the
32688 last window scheduled. */
32689
32690 static bool
32691 fits_dispatch_window (rtx insn)
32692 {
32693 dispatch_windows *window_list = dispatch_window_list;
32694 dispatch_windows *window_list_next = dispatch_window_list->next;
32695 unsigned int num_restrict;
32696 enum dispatch_group group = get_insn_group (insn);
32697 enum insn_path path = get_insn_path (insn);
32698 int sum;
32699
32700 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
32701 instructions should be given the lowest priority in the
32702 scheduling process in Haifa scheduler to make sure they will be
32703 scheduled in the same dispatch window as the refrence to them. */
32704 if (group == disp_jcc || group == disp_cmp)
32705 return false;
32706
32707 /* Check nonrestricted. */
32708 if (group == disp_no_group || group == disp_branch)
32709 return true;
32710
32711 /* Get last dispatch window. */
32712 if (window_list_next)
32713 window_list = window_list_next;
32714
32715 if (window_list->window_num == 1)
32716 {
32717 sum = window_list->prev->window_size + window_list->window_size;
32718
32719 if (sum == 32
32720 || (min_insn_size (insn) + sum) >= 48)
32721 /* Window 1 is full. Go for next window. */
32722 return true;
32723 }
32724
32725 num_restrict = count_num_restricted (insn, window_list);
32726
32727 if (num_restrict > num_allowable_groups[group])
32728 return false;
32729
32730 /* See if it fits in the first window. */
32731 if (window_list->window_num == 0)
32732 {
32733 /* The first widow should have only single and double path
32734 uops. */
32735 if (path == path_double
32736 && (window_list->num_uops + 2) > MAX_INSN)
32737 return false;
32738 else if (path != path_single)
32739 return false;
32740 }
32741 return true;
32742 }
32743
32744 /* Add an instruction INSN with NUM_UOPS micro-operations to the
32745 dispatch window WINDOW_LIST. */
32746
32747 static void
32748 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
32749 {
32750 int byte_len = min_insn_size (insn);
32751 int num_insn = window_list->num_insn;
32752 int imm_size;
32753 sched_insn_info *window = window_list->window;
32754 enum dispatch_group group = get_insn_group (insn);
32755 enum insn_path path = get_insn_path (insn);
32756 int num_imm_operand;
32757 int num_imm32_operand;
32758 int num_imm64_operand;
32759
32760 if (!window_list->violation && group != disp_cmp
32761 && !fits_dispatch_window (insn))
32762 window_list->violation = true;
32763
32764 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32765 &num_imm64_operand);
32766
32767 /* Initialize window with new instruction. */
32768 window[num_insn].insn = insn;
32769 window[num_insn].byte_len = byte_len;
32770 window[num_insn].group = group;
32771 window[num_insn].path = path;
32772 window[num_insn].imm_bytes = imm_size;
32773
32774 window_list->window_size += byte_len;
32775 window_list->num_insn = num_insn + 1;
32776 window_list->num_uops = window_list->num_uops + num_uops;
32777 window_list->imm_size += imm_size;
32778 window_list->num_imm += num_imm_operand;
32779 window_list->num_imm_32 += num_imm32_operand;
32780 window_list->num_imm_64 += num_imm64_operand;
32781
32782 if (group == disp_store)
32783 window_list->num_stores += 1;
32784 else if (group == disp_load
32785 || group == disp_prefetch)
32786 window_list->num_loads += 1;
32787 else if (group == disp_load_store)
32788 {
32789 window_list->num_stores += 1;
32790 window_list->num_loads += 1;
32791 }
32792 }
32793
32794 /* Adds a scheduled instruction, INSN, to the current dispatch window.
32795 If the total bytes of instructions or the number of instructions in
32796 the window exceed allowable, it allocates a new window. */
32797
32798 static void
32799 add_to_dispatch_window (rtx insn)
32800 {
32801 int byte_len;
32802 dispatch_windows *window_list;
32803 dispatch_windows *next_list;
32804 dispatch_windows *window0_list;
32805 enum insn_path path;
32806 enum dispatch_group insn_group;
32807 bool insn_fits;
32808 int num_insn;
32809 int num_uops;
32810 int window_num;
32811 int insn_num_uops;
32812 int sum;
32813
32814 if (INSN_CODE (insn) < 0)
32815 return;
32816
32817 byte_len = min_insn_size (insn);
32818 window_list = dispatch_window_list;
32819 next_list = window_list->next;
32820 path = get_insn_path (insn);
32821 insn_group = get_insn_group (insn);
32822
32823 /* Get the last dispatch window. */
32824 if (next_list)
32825 window_list = dispatch_window_list->next;
32826
32827 if (path == path_single)
32828 insn_num_uops = 1;
32829 else if (path == path_double)
32830 insn_num_uops = 2;
32831 else
32832 insn_num_uops = (int) path;
32833
32834 /* If current window is full, get a new window.
32835 Window number zero is full, if MAX_INSN uops are scheduled in it.
32836 Window number one is full, if window zero's bytes plus window
32837 one's bytes is 32, or if the bytes of the new instruction added
32838 to the total makes it greater than 48, or it has already MAX_INSN
32839 instructions in it. */
32840 num_insn = window_list->num_insn;
32841 num_uops = window_list->num_uops;
32842 window_num = window_list->window_num;
32843 insn_fits = fits_dispatch_window (insn);
32844
32845 if (num_insn >= MAX_INSN
32846 || num_uops + insn_num_uops > MAX_INSN
32847 || !(insn_fits))
32848 {
32849 window_num = ~window_num & 1;
32850 window_list = allocate_next_window (window_num);
32851 }
32852
32853 if (window_num == 0)
32854 {
32855 add_insn_window (insn, window_list, insn_num_uops);
32856 if (window_list->num_insn >= MAX_INSN
32857 && insn_group == disp_branch)
32858 {
32859 process_end_window ();
32860 return;
32861 }
32862 }
32863 else if (window_num == 1)
32864 {
32865 window0_list = window_list->prev;
32866 sum = window0_list->window_size + window_list->window_size;
32867 if (sum == 32
32868 || (byte_len + sum) >= 48)
32869 {
32870 process_end_window ();
32871 window_list = dispatch_window_list;
32872 }
32873
32874 add_insn_window (insn, window_list, insn_num_uops);
32875 }
32876 else
32877 gcc_unreachable ();
32878
32879 if (is_end_basic_block (insn_group))
32880 {
32881 /* End of basic block is reached do end-basic-block process. */
32882 process_end_window ();
32883 return;
32884 }
32885 }
32886
32887 /* Print the dispatch window, WINDOW_NUM, to FILE. */
32888
32889 DEBUG_FUNCTION static void
32890 debug_dispatch_window_file (FILE *file, int window_num)
32891 {
32892 dispatch_windows *list;
32893 int i;
32894
32895 if (window_num == 0)
32896 list = dispatch_window_list;
32897 else
32898 list = dispatch_window_list1;
32899
32900 fprintf (file, "Window #%d:\n", list->window_num);
32901 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
32902 list->num_insn, list->num_uops, list->window_size);
32903 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
32904 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
32905
32906 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
32907 list->num_stores);
32908 fprintf (file, " insn info:\n");
32909
32910 for (i = 0; i < MAX_INSN; i++)
32911 {
32912 if (!list->window[i].insn)
32913 break;
32914 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
32915 i, group_name[list->window[i].group],
32916 i, (void *)list->window[i].insn,
32917 i, list->window[i].path,
32918 i, list->window[i].byte_len,
32919 i, list->window[i].imm_bytes);
32920 }
32921 }
32922
32923 /* Print to stdout a dispatch window. */
32924
32925 DEBUG_FUNCTION void
32926 debug_dispatch_window (int window_num)
32927 {
32928 debug_dispatch_window_file (stdout, window_num);
32929 }
32930
32931 /* Print INSN dispatch information to FILE. */
32932
32933 DEBUG_FUNCTION static void
32934 debug_insn_dispatch_info_file (FILE *file, rtx insn)
32935 {
32936 int byte_len;
32937 enum insn_path path;
32938 enum dispatch_group group;
32939 int imm_size;
32940 int num_imm_operand;
32941 int num_imm32_operand;
32942 int num_imm64_operand;
32943
32944 if (INSN_CODE (insn) < 0)
32945 return;
32946
32947 byte_len = min_insn_size (insn);
32948 path = get_insn_path (insn);
32949 group = get_insn_group (insn);
32950 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32951 &num_imm64_operand);
32952
32953 fprintf (file, " insn info:\n");
32954 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
32955 group_name[group], path, byte_len);
32956 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
32957 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
32958 }
32959
32960 /* Print to STDERR the status of the ready list with respect to
32961 dispatch windows. */
32962
32963 DEBUG_FUNCTION void
32964 debug_ready_dispatch (void)
32965 {
32966 int i;
32967 int no_ready = number_in_ready ();
32968
32969 fprintf (stdout, "Number of ready: %d\n", no_ready);
32970
32971 for (i = 0; i < no_ready; i++)
32972 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
32973 }
32974
32975 /* This routine is the driver of the dispatch scheduler. */
32976
32977 static void
32978 do_dispatch (rtx insn, int mode)
32979 {
32980 if (mode == DISPATCH_INIT)
32981 init_dispatch_sched ();
32982 else if (mode == ADD_TO_DISPATCH_WINDOW)
32983 add_to_dispatch_window (insn);
32984 }
32985
32986 /* Return TRUE if Dispatch Scheduling is supported. */
32987
32988 static bool
32989 has_dispatch (rtx insn, int action)
32990 {
32991 if (ix86_tune == PROCESSOR_BDVER1 && flag_dispatch_scheduler)
32992 switch (action)
32993 {
32994 default:
32995 return false;
32996
32997 case IS_DISPATCH_ON:
32998 return true;
32999 break;
33000
33001 case IS_CMP:
33002 return is_cmp (insn);
33003
33004 case DISPATCH_VIOLATION:
33005 return dispatch_violation ();
33006
33007 case FITS_DISPATCH_WINDOW:
33008 return fits_dispatch_window (insn);
33009 }
33010
33011 return false;
33012 }
33013
33014 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
33015 place emms and femms instructions. */
33016
33017 static enum machine_mode
33018 ix86_preferred_simd_mode (enum machine_mode mode)
33019 {
33020 /* Disable double precision vectorizer if needed. */
33021 if (mode == DFmode && !TARGET_VECTORIZE_DOUBLE)
33022 return word_mode;
33023
33024 if (!TARGET_AVX && !TARGET_SSE)
33025 return word_mode;
33026
33027 switch (mode)
33028 {
33029 case SFmode:
33030 return TARGET_AVX ? V8SFmode : V4SFmode;
33031 case DFmode:
33032 return TARGET_AVX ? V4DFmode : V2DFmode;
33033 case DImode:
33034 return V2DImode;
33035 case SImode:
33036 return V4SImode;
33037 case HImode:
33038 return V8HImode;
33039 case QImode:
33040 return V16QImode;
33041
33042 default:;
33043 }
33044
33045 return word_mode;
33046 }
33047
33048 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
33049 vectors. */
33050
33051 static unsigned int
33052 ix86_autovectorize_vector_sizes (void)
33053 {
33054 return TARGET_AVX ? 32 | 16 : 0;
33055 }
33056
33057 /* Initialize the GCC target structure. */
33058 #undef TARGET_RETURN_IN_MEMORY
33059 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
33060
33061 #undef TARGET_LEGITIMIZE_ADDRESS
33062 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
33063
33064 #undef TARGET_ATTRIBUTE_TABLE
33065 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
33066 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
33067 # undef TARGET_MERGE_DECL_ATTRIBUTES
33068 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
33069 #endif
33070
33071 #undef TARGET_COMP_TYPE_ATTRIBUTES
33072 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
33073
33074 #undef TARGET_INIT_BUILTINS
33075 #define TARGET_INIT_BUILTINS ix86_init_builtins
33076 #undef TARGET_BUILTIN_DECL
33077 #define TARGET_BUILTIN_DECL ix86_builtin_decl
33078 #undef TARGET_EXPAND_BUILTIN
33079 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
33080
33081 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
33082 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
33083 ix86_builtin_vectorized_function
33084
33085 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
33086 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
33087
33088 #undef TARGET_BUILTIN_RECIPROCAL
33089 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
33090
33091 #undef TARGET_ASM_FUNCTION_EPILOGUE
33092 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
33093
33094 #undef TARGET_ENCODE_SECTION_INFO
33095 #ifndef SUBTARGET_ENCODE_SECTION_INFO
33096 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
33097 #else
33098 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
33099 #endif
33100
33101 #undef TARGET_ASM_OPEN_PAREN
33102 #define TARGET_ASM_OPEN_PAREN ""
33103 #undef TARGET_ASM_CLOSE_PAREN
33104 #define TARGET_ASM_CLOSE_PAREN ""
33105
33106 #undef TARGET_ASM_BYTE_OP
33107 #define TARGET_ASM_BYTE_OP ASM_BYTE
33108
33109 #undef TARGET_ASM_ALIGNED_HI_OP
33110 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
33111 #undef TARGET_ASM_ALIGNED_SI_OP
33112 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
33113 #ifdef ASM_QUAD
33114 #undef TARGET_ASM_ALIGNED_DI_OP
33115 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
33116 #endif
33117
33118 #undef TARGET_PROFILE_BEFORE_PROLOGUE
33119 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
33120
33121 #undef TARGET_ASM_UNALIGNED_HI_OP
33122 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
33123 #undef TARGET_ASM_UNALIGNED_SI_OP
33124 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
33125 #undef TARGET_ASM_UNALIGNED_DI_OP
33126 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
33127
33128 #undef TARGET_PRINT_OPERAND
33129 #define TARGET_PRINT_OPERAND ix86_print_operand
33130 #undef TARGET_PRINT_OPERAND_ADDRESS
33131 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
33132 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
33133 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
33134 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
33135 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
33136
33137 #undef TARGET_SCHED_ADJUST_COST
33138 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
33139 #undef TARGET_SCHED_ISSUE_RATE
33140 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
33141 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
33142 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
33143 ia32_multipass_dfa_lookahead
33144
33145 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
33146 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
33147
33148 #ifdef HAVE_AS_TLS
33149 #undef TARGET_HAVE_TLS
33150 #define TARGET_HAVE_TLS true
33151 #endif
33152 #undef TARGET_CANNOT_FORCE_CONST_MEM
33153 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
33154 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
33155 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
33156
33157 #undef TARGET_DELEGITIMIZE_ADDRESS
33158 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
33159
33160 #undef TARGET_MS_BITFIELD_LAYOUT_P
33161 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
33162
33163 #if TARGET_MACHO
33164 #undef TARGET_BINDS_LOCAL_P
33165 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
33166 #endif
33167 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
33168 #undef TARGET_BINDS_LOCAL_P
33169 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
33170 #endif
33171
33172 #undef TARGET_ASM_OUTPUT_MI_THUNK
33173 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
33174 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
33175 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
33176
33177 #undef TARGET_ASM_FILE_START
33178 #define TARGET_ASM_FILE_START x86_file_start
33179
33180 #undef TARGET_DEFAULT_TARGET_FLAGS
33181 #define TARGET_DEFAULT_TARGET_FLAGS \
33182 (TARGET_DEFAULT \
33183 | TARGET_SUBTARGET_DEFAULT \
33184 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT \
33185 | MASK_FUSED_MADD)
33186
33187 #undef TARGET_HANDLE_OPTION
33188 #define TARGET_HANDLE_OPTION ix86_handle_option
33189
33190 #undef TARGET_OPTION_OVERRIDE
33191 #define TARGET_OPTION_OVERRIDE ix86_option_override
33192 #undef TARGET_OPTION_OPTIMIZATION
33193 #define TARGET_OPTION_OPTIMIZATION ix86_option_optimization
33194
33195 #undef TARGET_REGISTER_MOVE_COST
33196 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
33197 #undef TARGET_MEMORY_MOVE_COST
33198 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
33199 #undef TARGET_RTX_COSTS
33200 #define TARGET_RTX_COSTS ix86_rtx_costs
33201 #undef TARGET_ADDRESS_COST
33202 #define TARGET_ADDRESS_COST ix86_address_cost
33203
33204 #undef TARGET_FIXED_CONDITION_CODE_REGS
33205 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
33206 #undef TARGET_CC_MODES_COMPATIBLE
33207 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
33208
33209 #undef TARGET_MACHINE_DEPENDENT_REORG
33210 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
33211
33212 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
33213 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
33214
33215 #undef TARGET_BUILD_BUILTIN_VA_LIST
33216 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
33217
33218 #undef TARGET_ENUM_VA_LIST_P
33219 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
33220
33221 #undef TARGET_FN_ABI_VA_LIST
33222 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
33223
33224 #undef TARGET_CANONICAL_VA_LIST_TYPE
33225 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
33226
33227 #undef TARGET_EXPAND_BUILTIN_VA_START
33228 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
33229
33230 #undef TARGET_MD_ASM_CLOBBERS
33231 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
33232
33233 #undef TARGET_PROMOTE_PROTOTYPES
33234 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
33235 #undef TARGET_STRUCT_VALUE_RTX
33236 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
33237 #undef TARGET_SETUP_INCOMING_VARARGS
33238 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
33239 #undef TARGET_MUST_PASS_IN_STACK
33240 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
33241 #undef TARGET_FUNCTION_ARG_ADVANCE
33242 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
33243 #undef TARGET_FUNCTION_ARG
33244 #define TARGET_FUNCTION_ARG ix86_function_arg
33245 #undef TARGET_PASS_BY_REFERENCE
33246 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
33247 #undef TARGET_INTERNAL_ARG_POINTER
33248 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
33249 #undef TARGET_UPDATE_STACK_BOUNDARY
33250 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
33251 #undef TARGET_GET_DRAP_RTX
33252 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
33253 #undef TARGET_STRICT_ARGUMENT_NAMING
33254 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
33255 #undef TARGET_STATIC_CHAIN
33256 #define TARGET_STATIC_CHAIN ix86_static_chain
33257 #undef TARGET_TRAMPOLINE_INIT
33258 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
33259 #undef TARGET_RETURN_POPS_ARGS
33260 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
33261
33262 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
33263 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
33264
33265 #undef TARGET_SCALAR_MODE_SUPPORTED_P
33266 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
33267
33268 #undef TARGET_VECTOR_MODE_SUPPORTED_P
33269 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
33270
33271 #undef TARGET_C_MODE_FOR_SUFFIX
33272 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
33273
33274 #ifdef HAVE_AS_TLS
33275 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
33276 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
33277 #endif
33278
33279 #ifdef SUBTARGET_INSERT_ATTRIBUTES
33280 #undef TARGET_INSERT_ATTRIBUTES
33281 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
33282 #endif
33283
33284 #undef TARGET_MANGLE_TYPE
33285 #define TARGET_MANGLE_TYPE ix86_mangle_type
33286
33287 #undef TARGET_STACK_PROTECT_FAIL
33288 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
33289
33290 #undef TARGET_SUPPORTS_SPLIT_STACK
33291 #define TARGET_SUPPORTS_SPLIT_STACK ix86_supports_split_stack
33292
33293 #undef TARGET_FUNCTION_VALUE
33294 #define TARGET_FUNCTION_VALUE ix86_function_value
33295
33296 #undef TARGET_FUNCTION_VALUE_REGNO_P
33297 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
33298
33299 #undef TARGET_SECONDARY_RELOAD
33300 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
33301
33302 #undef TARGET_CLASS_LIKELY_SPILLED_P
33303 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
33304
33305 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
33306 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
33307 ix86_builtin_vectorization_cost
33308 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
33309 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
33310 ix86_vectorize_builtin_vec_perm
33311 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
33312 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
33313 ix86_vectorize_builtin_vec_perm_ok
33314 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
33315 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
33316 ix86_preferred_simd_mode
33317 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
33318 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
33319 ix86_autovectorize_vector_sizes
33320
33321 #undef TARGET_SET_CURRENT_FUNCTION
33322 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
33323
33324 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
33325 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
33326
33327 #undef TARGET_OPTION_SAVE
33328 #define TARGET_OPTION_SAVE ix86_function_specific_save
33329
33330 #undef TARGET_OPTION_RESTORE
33331 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
33332
33333 #undef TARGET_OPTION_PRINT
33334 #define TARGET_OPTION_PRINT ix86_function_specific_print
33335
33336 #undef TARGET_CAN_INLINE_P
33337 #define TARGET_CAN_INLINE_P ix86_can_inline_p
33338
33339 #undef TARGET_EXPAND_TO_RTL_HOOK
33340 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
33341
33342 #undef TARGET_LEGITIMATE_ADDRESS_P
33343 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
33344
33345 #undef TARGET_IRA_COVER_CLASSES
33346 #define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
33347
33348 #undef TARGET_FRAME_POINTER_REQUIRED
33349 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
33350
33351 #undef TARGET_CAN_ELIMINATE
33352 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
33353
33354 #undef TARGET_EXTRA_LIVE_ON_ENTRY
33355 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
33356
33357 #undef TARGET_ASM_CODE_END
33358 #define TARGET_ASM_CODE_END ix86_code_end
33359
33360 struct gcc_target targetm = TARGET_INITIALIZER;
33361 \f
33362 #include "gt-i386.h"