39308cf59188bbf65d6d18548a1dc27f8a0d12a8
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "output.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "diagnostic-core.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "gimple.h"
51 #include "dwarf2.h"
52 #include "df.h"
53 #include "tm-constrs.h"
54 #include "params.h"
55 #include "cselib.h"
56 #include "debug.h"
57 #include "dwarf2out.h"
58 #include "sched-int.h"
59 static rtx legitimize_dllimport_symbol (rtx, bool);
60
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
63 #endif
64
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
71 : 4)
72
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
76
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
78
79 const
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
98 0, /* "large" insn */
99 2, /* MOVE_RATIO */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
125 2, /* Branch cost */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
147 };
148
149 /* Processor costs (relative to an add) */
150 static const
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
170 3, /* MOVE_RATIO */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
196 1, /* Branch cost */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
218 };
219
220 static const
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
240 3, /* MOVE_RATIO */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
268 1, /* Branch cost */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
290 };
291
292 static const
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
312 6, /* MOVE_RATIO */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
338 2, /* Branch cost */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
360 };
361
362 static const
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
382 6, /* MOVE_RATIO */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
408 2, /* Branch cost */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
416 (we ensure the alignment). For small blocks inline loop is still a
417 noticeable win, for bigger blocks either rep movsl or rep movsb is
418 way to go. Rep movsb has apparently more expensive startup time in CPU,
419 but after 4K the difference is down in the noise. */
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
437 };
438
439 static const
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
459 4, /* MOVE_RATIO */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
470
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
486 1, /* Branch cost */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
508 };
509
510 static const
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
530 4, /* MOVE_RATIO */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
559 1, /* Branch cost */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
581 };
582
583 static const
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
603 9, /* MOVE_RATIO */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
629 5, /* Branch cost */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
654 };
655
656 static const
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
676 9, /* MOVE_RATIO */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
705 time). */
706 100, /* number of parallel prefetches */
707 3, /* Branch cost */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very
715 small blocks it is better to use loop. For large blocks, libcall can
716 do nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
733 };
734
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
754 9, /* MOVE_RATIO */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
776 /* On K8:
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
779 On AMDFAM10:
780 MOVD reg64, xmmreg Double FADD 3
781 1/1 1/1
782 MOVD reg32, xmmreg Double FADD 3
783 1/1 1/1 */
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
791 time). */
792 100, /* number of parallel prefetches */
793 2, /* Branch cost */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
820 };
821
822 struct processor_costs bdver1_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (2), /* cost of a lea instruction */
825 COSTS_N_INSNS (1), /* variable shift costs */
826 COSTS_N_INSNS (1), /* constant shift costs */
827 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (4), /* HI */
829 COSTS_N_INSNS (3), /* SI */
830 COSTS_N_INSNS (4), /* DI */
831 COSTS_N_INSNS (5)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (35), /* HI */
835 COSTS_N_INSNS (51), /* SI */
836 COSTS_N_INSNS (83), /* DI */
837 COSTS_N_INSNS (83)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 8, /* "large" insn */
841 9, /* MOVE_RATIO */
842 4, /* cost for loading QImode using movzbl */
843 {3, 4, 3}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {3, 4, 3}, /* cost of storing integer registers */
847 4, /* cost of reg,reg fld/fst */
848 {4, 4, 12}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {6, 6, 8}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {3, 3}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {4, 4}, /* cost of storing MMX registers
856 in SImode and DImode */
857 2, /* cost of moving SSE register */
858 {4, 4, 3}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {4, 4, 5}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 3, /* MMX or SSE register to integer */
863 /* On K8:
864 MOVD reg64, xmmreg Double FSTORE 4
865 MOVD reg32, xmmreg Double FSTORE 4
866 On AMDFAM10:
867 MOVD reg64, xmmreg Double FADD 3
868 1/1 1/1
869 MOVD reg32, xmmreg Double FADD 3
870 1/1 1/1 */
871 64, /* size of l1 cache. */
872 1024, /* size of l2 cache. */
873 64, /* size of prefetch block */
874 /* New AMD processors never drop prefetches; if they cannot be performed
875 immediately, they are queued. We set number of simultaneous prefetches
876 to a large constant to reflect this (it probably is not a good idea not
877 to limit number of prefetches at all, as their execution also takes some
878 time). */
879 100, /* number of parallel prefetches */
880 2, /* Branch cost */
881 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
882 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
883 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
884 COSTS_N_INSNS (2), /* cost of FABS instruction. */
885 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
886 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
887
888 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
889 very small blocks it is better to use loop. For large blocks, libcall
890 can do nontemporary accesses and beat inline considerably. */
891 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
892 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
893 {{libcall, {{8, loop}, {24, unrolled_loop},
894 {2048, rep_prefix_4_byte}, {-1, libcall}}},
895 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
896 4, /* scalar_stmt_cost. */
897 2, /* scalar load_cost. */
898 2, /* scalar_store_cost. */
899 6, /* vec_stmt_cost. */
900 0, /* vec_to_scalar_cost. */
901 2, /* scalar_to_vec_cost. */
902 2, /* vec_align_load_cost. */
903 2, /* vec_unalign_load_cost. */
904 2, /* vec_store_cost. */
905 2, /* cond_taken_branch_cost. */
906 1, /* cond_not_taken_branch_cost. */
907 };
908
909 static const
910 struct processor_costs pentium4_cost = {
911 COSTS_N_INSNS (1), /* cost of an add instruction */
912 COSTS_N_INSNS (3), /* cost of a lea instruction */
913 COSTS_N_INSNS (4), /* variable shift costs */
914 COSTS_N_INSNS (4), /* constant shift costs */
915 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
916 COSTS_N_INSNS (15), /* HI */
917 COSTS_N_INSNS (15), /* SI */
918 COSTS_N_INSNS (15), /* DI */
919 COSTS_N_INSNS (15)}, /* other */
920 0, /* cost of multiply per each bit set */
921 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
922 COSTS_N_INSNS (56), /* HI */
923 COSTS_N_INSNS (56), /* SI */
924 COSTS_N_INSNS (56), /* DI */
925 COSTS_N_INSNS (56)}, /* other */
926 COSTS_N_INSNS (1), /* cost of movsx */
927 COSTS_N_INSNS (1), /* cost of movzx */
928 16, /* "large" insn */
929 6, /* MOVE_RATIO */
930 2, /* cost for loading QImode using movzbl */
931 {4, 5, 4}, /* cost of loading integer registers
932 in QImode, HImode and SImode.
933 Relative to reg-reg move (2). */
934 {2, 3, 2}, /* cost of storing integer registers */
935 2, /* cost of reg,reg fld/fst */
936 {2, 2, 6}, /* cost of loading fp registers
937 in SFmode, DFmode and XFmode */
938 {4, 4, 6}, /* cost of storing fp registers
939 in SFmode, DFmode and XFmode */
940 2, /* cost of moving MMX register */
941 {2, 2}, /* cost of loading MMX registers
942 in SImode and DImode */
943 {2, 2}, /* cost of storing MMX registers
944 in SImode and DImode */
945 12, /* cost of moving SSE register */
946 {12, 12, 12}, /* cost of loading SSE registers
947 in SImode, DImode and TImode */
948 {2, 2, 8}, /* cost of storing SSE registers
949 in SImode, DImode and TImode */
950 10, /* MMX or SSE register to integer */
951 8, /* size of l1 cache. */
952 256, /* size of l2 cache. */
953 64, /* size of prefetch block */
954 6, /* number of parallel prefetches */
955 2, /* Branch cost */
956 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
957 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
958 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
959 COSTS_N_INSNS (2), /* cost of FABS instruction. */
960 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
961 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
962 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
963 DUMMY_STRINGOP_ALGS},
964 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
965 {-1, libcall}}},
966 DUMMY_STRINGOP_ALGS},
967 1, /* scalar_stmt_cost. */
968 1, /* scalar load_cost. */
969 1, /* scalar_store_cost. */
970 1, /* vec_stmt_cost. */
971 1, /* vec_to_scalar_cost. */
972 1, /* scalar_to_vec_cost. */
973 1, /* vec_align_load_cost. */
974 2, /* vec_unalign_load_cost. */
975 1, /* vec_store_cost. */
976 3, /* cond_taken_branch_cost. */
977 1, /* cond_not_taken_branch_cost. */
978 };
979
980 static const
981 struct processor_costs nocona_cost = {
982 COSTS_N_INSNS (1), /* cost of an add instruction */
983 COSTS_N_INSNS (1), /* cost of a lea instruction */
984 COSTS_N_INSNS (1), /* variable shift costs */
985 COSTS_N_INSNS (1), /* constant shift costs */
986 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
987 COSTS_N_INSNS (10), /* HI */
988 COSTS_N_INSNS (10), /* SI */
989 COSTS_N_INSNS (10), /* DI */
990 COSTS_N_INSNS (10)}, /* other */
991 0, /* cost of multiply per each bit set */
992 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
993 COSTS_N_INSNS (66), /* HI */
994 COSTS_N_INSNS (66), /* SI */
995 COSTS_N_INSNS (66), /* DI */
996 COSTS_N_INSNS (66)}, /* other */
997 COSTS_N_INSNS (1), /* cost of movsx */
998 COSTS_N_INSNS (1), /* cost of movzx */
999 16, /* "large" insn */
1000 17, /* MOVE_RATIO */
1001 4, /* cost for loading QImode using movzbl */
1002 {4, 4, 4}, /* cost of loading integer registers
1003 in QImode, HImode and SImode.
1004 Relative to reg-reg move (2). */
1005 {4, 4, 4}, /* cost of storing integer registers */
1006 3, /* cost of reg,reg fld/fst */
1007 {12, 12, 12}, /* cost of loading fp registers
1008 in SFmode, DFmode and XFmode */
1009 {4, 4, 4}, /* cost of storing fp registers
1010 in SFmode, DFmode and XFmode */
1011 6, /* cost of moving MMX register */
1012 {12, 12}, /* cost of loading MMX registers
1013 in SImode and DImode */
1014 {12, 12}, /* cost of storing MMX registers
1015 in SImode and DImode */
1016 6, /* cost of moving SSE register */
1017 {12, 12, 12}, /* cost of loading SSE registers
1018 in SImode, DImode and TImode */
1019 {12, 12, 12}, /* cost of storing SSE registers
1020 in SImode, DImode and TImode */
1021 8, /* MMX or SSE register to integer */
1022 8, /* size of l1 cache. */
1023 1024, /* size of l2 cache. */
1024 128, /* size of prefetch block */
1025 8, /* number of parallel prefetches */
1026 1, /* Branch cost */
1027 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1028 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1029 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1030 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1031 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1032 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1033 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1034 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1035 {100000, unrolled_loop}, {-1, libcall}}}},
1036 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1037 {-1, libcall}}},
1038 {libcall, {{24, loop}, {64, unrolled_loop},
1039 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1040 1, /* scalar_stmt_cost. */
1041 1, /* scalar load_cost. */
1042 1, /* scalar_store_cost. */
1043 1, /* vec_stmt_cost. */
1044 1, /* vec_to_scalar_cost. */
1045 1, /* scalar_to_vec_cost. */
1046 1, /* vec_align_load_cost. */
1047 2, /* vec_unalign_load_cost. */
1048 1, /* vec_store_cost. */
1049 3, /* cond_taken_branch_cost. */
1050 1, /* cond_not_taken_branch_cost. */
1051 };
1052
1053 static const
1054 struct processor_costs core2_cost = {
1055 COSTS_N_INSNS (1), /* cost of an add instruction */
1056 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1057 COSTS_N_INSNS (1), /* variable shift costs */
1058 COSTS_N_INSNS (1), /* constant shift costs */
1059 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1060 COSTS_N_INSNS (3), /* HI */
1061 COSTS_N_INSNS (3), /* SI */
1062 COSTS_N_INSNS (3), /* DI */
1063 COSTS_N_INSNS (3)}, /* other */
1064 0, /* cost of multiply per each bit set */
1065 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
1066 COSTS_N_INSNS (22), /* HI */
1067 COSTS_N_INSNS (22), /* SI */
1068 COSTS_N_INSNS (22), /* DI */
1069 COSTS_N_INSNS (22)}, /* other */
1070 COSTS_N_INSNS (1), /* cost of movsx */
1071 COSTS_N_INSNS (1), /* cost of movzx */
1072 8, /* "large" insn */
1073 16, /* MOVE_RATIO */
1074 2, /* cost for loading QImode using movzbl */
1075 {6, 6, 6}, /* cost of loading integer registers
1076 in QImode, HImode and SImode.
1077 Relative to reg-reg move (2). */
1078 {4, 4, 4}, /* cost of storing integer registers */
1079 2, /* cost of reg,reg fld/fst */
1080 {6, 6, 6}, /* cost of loading fp registers
1081 in SFmode, DFmode and XFmode */
1082 {4, 4, 4}, /* cost of storing fp registers
1083 in SFmode, DFmode and XFmode */
1084 2, /* cost of moving MMX register */
1085 {6, 6}, /* cost of loading MMX registers
1086 in SImode and DImode */
1087 {4, 4}, /* cost of storing MMX registers
1088 in SImode and DImode */
1089 2, /* cost of moving SSE register */
1090 {6, 6, 6}, /* cost of loading SSE registers
1091 in SImode, DImode and TImode */
1092 {4, 4, 4}, /* cost of storing SSE registers
1093 in SImode, DImode and TImode */
1094 2, /* MMX or SSE register to integer */
1095 32, /* size of l1 cache. */
1096 2048, /* size of l2 cache. */
1097 128, /* size of prefetch block */
1098 8, /* number of parallel prefetches */
1099 3, /* Branch cost */
1100 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1101 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1102 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1103 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1104 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1105 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1106 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1107 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1108 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1109 {{libcall, {{8, loop}, {15, unrolled_loop},
1110 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1111 {libcall, {{24, loop}, {32, unrolled_loop},
1112 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1113 1, /* scalar_stmt_cost. */
1114 1, /* scalar load_cost. */
1115 1, /* scalar_store_cost. */
1116 1, /* vec_stmt_cost. */
1117 1, /* vec_to_scalar_cost. */
1118 1, /* scalar_to_vec_cost. */
1119 1, /* vec_align_load_cost. */
1120 2, /* vec_unalign_load_cost. */
1121 1, /* vec_store_cost. */
1122 3, /* cond_taken_branch_cost. */
1123 1, /* cond_not_taken_branch_cost. */
1124 };
1125
1126 static const
1127 struct processor_costs atom_cost = {
1128 COSTS_N_INSNS (1), /* cost of an add instruction */
1129 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1130 COSTS_N_INSNS (1), /* variable shift costs */
1131 COSTS_N_INSNS (1), /* constant shift costs */
1132 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1133 COSTS_N_INSNS (4), /* HI */
1134 COSTS_N_INSNS (3), /* SI */
1135 COSTS_N_INSNS (4), /* DI */
1136 COSTS_N_INSNS (2)}, /* other */
1137 0, /* cost of multiply per each bit set */
1138 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1139 COSTS_N_INSNS (26), /* HI */
1140 COSTS_N_INSNS (42), /* SI */
1141 COSTS_N_INSNS (74), /* DI */
1142 COSTS_N_INSNS (74)}, /* other */
1143 COSTS_N_INSNS (1), /* cost of movsx */
1144 COSTS_N_INSNS (1), /* cost of movzx */
1145 8, /* "large" insn */
1146 17, /* MOVE_RATIO */
1147 2, /* cost for loading QImode using movzbl */
1148 {4, 4, 4}, /* cost of loading integer registers
1149 in QImode, HImode and SImode.
1150 Relative to reg-reg move (2). */
1151 {4, 4, 4}, /* cost of storing integer registers */
1152 4, /* cost of reg,reg fld/fst */
1153 {12, 12, 12}, /* cost of loading fp registers
1154 in SFmode, DFmode and XFmode */
1155 {6, 6, 8}, /* cost of storing fp registers
1156 in SFmode, DFmode and XFmode */
1157 2, /* cost of moving MMX register */
1158 {8, 8}, /* cost of loading MMX registers
1159 in SImode and DImode */
1160 {8, 8}, /* cost of storing MMX registers
1161 in SImode and DImode */
1162 2, /* cost of moving SSE register */
1163 {8, 8, 8}, /* cost of loading SSE registers
1164 in SImode, DImode and TImode */
1165 {8, 8, 8}, /* cost of storing SSE registers
1166 in SImode, DImode and TImode */
1167 5, /* MMX or SSE register to integer */
1168 32, /* size of l1 cache. */
1169 256, /* size of l2 cache. */
1170 64, /* size of prefetch block */
1171 6, /* number of parallel prefetches */
1172 3, /* Branch cost */
1173 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1174 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1175 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1176 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1177 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1178 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1179 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1180 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1181 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1182 {{libcall, {{8, loop}, {15, unrolled_loop},
1183 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1184 {libcall, {{24, loop}, {32, unrolled_loop},
1185 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1186 1, /* scalar_stmt_cost. */
1187 1, /* scalar load_cost. */
1188 1, /* scalar_store_cost. */
1189 1, /* vec_stmt_cost. */
1190 1, /* vec_to_scalar_cost. */
1191 1, /* scalar_to_vec_cost. */
1192 1, /* vec_align_load_cost. */
1193 2, /* vec_unalign_load_cost. */
1194 1, /* vec_store_cost. */
1195 3, /* cond_taken_branch_cost. */
1196 1, /* cond_not_taken_branch_cost. */
1197 };
1198
1199 /* Generic64 should produce code tuned for Nocona and K8. */
1200 static const
1201 struct processor_costs generic64_cost = {
1202 COSTS_N_INSNS (1), /* cost of an add instruction */
1203 /* On all chips taken into consideration lea is 2 cycles and more. With
1204 this cost however our current implementation of synth_mult results in
1205 use of unnecessary temporary registers causing regression on several
1206 SPECfp benchmarks. */
1207 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1208 COSTS_N_INSNS (1), /* variable shift costs */
1209 COSTS_N_INSNS (1), /* constant shift costs */
1210 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1211 COSTS_N_INSNS (4), /* HI */
1212 COSTS_N_INSNS (3), /* SI */
1213 COSTS_N_INSNS (4), /* DI */
1214 COSTS_N_INSNS (2)}, /* other */
1215 0, /* cost of multiply per each bit set */
1216 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1217 COSTS_N_INSNS (26), /* HI */
1218 COSTS_N_INSNS (42), /* SI */
1219 COSTS_N_INSNS (74), /* DI */
1220 COSTS_N_INSNS (74)}, /* other */
1221 COSTS_N_INSNS (1), /* cost of movsx */
1222 COSTS_N_INSNS (1), /* cost of movzx */
1223 8, /* "large" insn */
1224 17, /* MOVE_RATIO */
1225 4, /* cost for loading QImode using movzbl */
1226 {4, 4, 4}, /* cost of loading integer registers
1227 in QImode, HImode and SImode.
1228 Relative to reg-reg move (2). */
1229 {4, 4, 4}, /* cost of storing integer registers */
1230 4, /* cost of reg,reg fld/fst */
1231 {12, 12, 12}, /* cost of loading fp registers
1232 in SFmode, DFmode and XFmode */
1233 {6, 6, 8}, /* cost of storing fp registers
1234 in SFmode, DFmode and XFmode */
1235 2, /* cost of moving MMX register */
1236 {8, 8}, /* cost of loading MMX registers
1237 in SImode and DImode */
1238 {8, 8}, /* cost of storing MMX registers
1239 in SImode and DImode */
1240 2, /* cost of moving SSE register */
1241 {8, 8, 8}, /* cost of loading SSE registers
1242 in SImode, DImode and TImode */
1243 {8, 8, 8}, /* cost of storing SSE registers
1244 in SImode, DImode and TImode */
1245 5, /* MMX or SSE register to integer */
1246 32, /* size of l1 cache. */
1247 512, /* size of l2 cache. */
1248 64, /* size of prefetch block */
1249 6, /* number of parallel prefetches */
1250 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1251 value is increased to perhaps more appropriate value of 5. */
1252 3, /* Branch cost */
1253 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1254 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1255 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1256 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1257 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1258 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1259 {DUMMY_STRINGOP_ALGS,
1260 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1261 {DUMMY_STRINGOP_ALGS,
1262 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1263 1, /* scalar_stmt_cost. */
1264 1, /* scalar load_cost. */
1265 1, /* scalar_store_cost. */
1266 1, /* vec_stmt_cost. */
1267 1, /* vec_to_scalar_cost. */
1268 1, /* scalar_to_vec_cost. */
1269 1, /* vec_align_load_cost. */
1270 2, /* vec_unalign_load_cost. */
1271 1, /* vec_store_cost. */
1272 3, /* cond_taken_branch_cost. */
1273 1, /* cond_not_taken_branch_cost. */
1274 };
1275
1276 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1277 Athlon and K8. */
1278 static const
1279 struct processor_costs generic32_cost = {
1280 COSTS_N_INSNS (1), /* cost of an add instruction */
1281 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1282 COSTS_N_INSNS (1), /* variable shift costs */
1283 COSTS_N_INSNS (1), /* constant shift costs */
1284 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1285 COSTS_N_INSNS (4), /* HI */
1286 COSTS_N_INSNS (3), /* SI */
1287 COSTS_N_INSNS (4), /* DI */
1288 COSTS_N_INSNS (2)}, /* other */
1289 0, /* cost of multiply per each bit set */
1290 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1291 COSTS_N_INSNS (26), /* HI */
1292 COSTS_N_INSNS (42), /* SI */
1293 COSTS_N_INSNS (74), /* DI */
1294 COSTS_N_INSNS (74)}, /* other */
1295 COSTS_N_INSNS (1), /* cost of movsx */
1296 COSTS_N_INSNS (1), /* cost of movzx */
1297 8, /* "large" insn */
1298 17, /* MOVE_RATIO */
1299 4, /* cost for loading QImode using movzbl */
1300 {4, 4, 4}, /* cost of loading integer registers
1301 in QImode, HImode and SImode.
1302 Relative to reg-reg move (2). */
1303 {4, 4, 4}, /* cost of storing integer registers */
1304 4, /* cost of reg,reg fld/fst */
1305 {12, 12, 12}, /* cost of loading fp registers
1306 in SFmode, DFmode and XFmode */
1307 {6, 6, 8}, /* cost of storing fp registers
1308 in SFmode, DFmode and XFmode */
1309 2, /* cost of moving MMX register */
1310 {8, 8}, /* cost of loading MMX registers
1311 in SImode and DImode */
1312 {8, 8}, /* cost of storing MMX registers
1313 in SImode and DImode */
1314 2, /* cost of moving SSE register */
1315 {8, 8, 8}, /* cost of loading SSE registers
1316 in SImode, DImode and TImode */
1317 {8, 8, 8}, /* cost of storing SSE registers
1318 in SImode, DImode and TImode */
1319 5, /* MMX or SSE register to integer */
1320 32, /* size of l1 cache. */
1321 256, /* size of l2 cache. */
1322 64, /* size of prefetch block */
1323 6, /* number of parallel prefetches */
1324 3, /* Branch cost */
1325 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1326 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1327 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1328 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1329 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1330 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1331 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1332 DUMMY_STRINGOP_ALGS},
1333 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1334 DUMMY_STRINGOP_ALGS},
1335 1, /* scalar_stmt_cost. */
1336 1, /* scalar load_cost. */
1337 1, /* scalar_store_cost. */
1338 1, /* vec_stmt_cost. */
1339 1, /* vec_to_scalar_cost. */
1340 1, /* scalar_to_vec_cost. */
1341 1, /* vec_align_load_cost. */
1342 2, /* vec_unalign_load_cost. */
1343 1, /* vec_store_cost. */
1344 3, /* cond_taken_branch_cost. */
1345 1, /* cond_not_taken_branch_cost. */
1346 };
1347
1348 const struct processor_costs *ix86_cost = &pentium_cost;
1349
1350 /* Processor feature/optimization bitmasks. */
1351 #define m_386 (1<<PROCESSOR_I386)
1352 #define m_486 (1<<PROCESSOR_I486)
1353 #define m_PENT (1<<PROCESSOR_PENTIUM)
1354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1355 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1356 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1357 #define m_CORE2 (1<<PROCESSOR_CORE2)
1358 #define m_ATOM (1<<PROCESSOR_ATOM)
1359
1360 #define m_GEODE (1<<PROCESSOR_GEODE)
1361 #define m_K6 (1<<PROCESSOR_K6)
1362 #define m_K6_GEODE (m_K6 | m_GEODE)
1363 #define m_K8 (1<<PROCESSOR_K8)
1364 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1365 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1366 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1367 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1368 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1)
1369
1370 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1371 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1372
1373 /* Generic instruction choice should be common subset of supported CPUs
1374 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1375 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1376
1377 /* Feature tests against the various tunings. */
1378 unsigned char ix86_tune_features[X86_TUNE_LAST];
1379
1380 /* Feature tests against the various tunings used to create ix86_tune_features
1381 based on the processor mask. */
1382 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1383 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1384 negatively, so enabling for Generic64 seems like good code size
1385 tradeoff. We can't enable it for 32bit generic because it does not
1386 work well with PPro base chips. */
1387 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1388
1389 /* X86_TUNE_PUSH_MEMORY */
1390 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1391 | m_NOCONA | m_CORE2 | m_GENERIC,
1392
1393 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1394 m_486 | m_PENT,
1395
1396 /* X86_TUNE_UNROLL_STRLEN */
1397 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1398 | m_CORE2 | m_GENERIC,
1399
1400 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1401 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1402
1403 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1404 on simulation result. But after P4 was made, no performance benefit
1405 was observed with branch hints. It also increases the code size.
1406 As a result, icc never generates branch hints. */
1407 0,
1408
1409 /* X86_TUNE_DOUBLE_WITH_ADD */
1410 ~m_386,
1411
1412 /* X86_TUNE_USE_SAHF */
1413 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_PENT4
1414 | m_NOCONA | m_CORE2 | m_GENERIC,
1415
1416 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1417 partial dependencies. */
1418 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1419 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1420
1421 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1422 register stalls on Generic32 compilation setting as well. However
1423 in current implementation the partial register stalls are not eliminated
1424 very well - they can be introduced via subregs synthesized by combine
1425 and can happen in caller/callee saving sequences. Because this option
1426 pays back little on PPro based chips and is in conflict with partial reg
1427 dependencies used by Athlon/P4 based chips, it is better to leave it off
1428 for generic32 for now. */
1429 m_PPRO,
1430
1431 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1432 m_CORE2 | m_GENERIC,
1433
1434 /* X86_TUNE_USE_HIMODE_FIOP */
1435 m_386 | m_486 | m_K6_GEODE,
1436
1437 /* X86_TUNE_USE_SIMODE_FIOP */
1438 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1439
1440 /* X86_TUNE_USE_MOV0 */
1441 m_K6,
1442
1443 /* X86_TUNE_USE_CLTD */
1444 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1445
1446 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1447 m_PENT4,
1448
1449 /* X86_TUNE_SPLIT_LONG_MOVES */
1450 m_PPRO,
1451
1452 /* X86_TUNE_READ_MODIFY_WRITE */
1453 ~m_PENT,
1454
1455 /* X86_TUNE_READ_MODIFY */
1456 ~(m_PENT | m_PPRO),
1457
1458 /* X86_TUNE_PROMOTE_QIMODE */
1459 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1460 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1461
1462 /* X86_TUNE_FAST_PREFIX */
1463 ~(m_PENT | m_486 | m_386),
1464
1465 /* X86_TUNE_SINGLE_STRINGOP */
1466 m_386 | m_PENT4 | m_NOCONA,
1467
1468 /* X86_TUNE_QIMODE_MATH */
1469 ~0,
1470
1471 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1472 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1473 might be considered for Generic32 if our scheme for avoiding partial
1474 stalls was more effective. */
1475 ~m_PPRO,
1476
1477 /* X86_TUNE_PROMOTE_QI_REGS */
1478 0,
1479
1480 /* X86_TUNE_PROMOTE_HI_REGS */
1481 m_PPRO,
1482
1483 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1484 over esp addition. */
1485 m_386 | m_486 | m_PENT | m_PPRO,
1486
1487 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1488 over esp addition. */
1489 m_PENT,
1490
1491 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1492 over esp subtraction. */
1493 m_386 | m_486 | m_PENT | m_K6_GEODE,
1494
1495 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1496 over esp subtraction. */
1497 m_PENT | m_K6_GEODE,
1498
1499 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1500 for DFmode copies */
1501 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1502 | m_GENERIC | m_GEODE),
1503
1504 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1505 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1506
1507 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1508 conflict here in between PPro/Pentium4 based chips that thread 128bit
1509 SSE registers as single units versus K8 based chips that divide SSE
1510 registers to two 64bit halves. This knob promotes all store destinations
1511 to be 128bit to allow register renaming on 128bit SSE units, but usually
1512 results in one extra microop on 64bit SSE units. Experimental results
1513 shows that disabling this option on P4 brings over 20% SPECfp regression,
1514 while enabling it on K8 brings roughly 2.4% regression that can be partly
1515 masked by careful scheduling of moves. */
1516 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1517 | m_AMDFAM10 | m_BDVER1,
1518
1519 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1520 m_AMDFAM10 | m_BDVER1,
1521
1522 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1523 m_BDVER1,
1524
1525 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1526 m_BDVER1,
1527
1528 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1529 are resolved on SSE register parts instead of whole registers, so we may
1530 maintain just lower part of scalar values in proper format leaving the
1531 upper part undefined. */
1532 m_ATHLON_K8,
1533
1534 /* X86_TUNE_SSE_TYPELESS_STORES */
1535 m_AMD_MULTIPLE,
1536
1537 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1538 m_PPRO | m_PENT4 | m_NOCONA,
1539
1540 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1541 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1542
1543 /* X86_TUNE_PROLOGUE_USING_MOVE */
1544 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1545
1546 /* X86_TUNE_EPILOGUE_USING_MOVE */
1547 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1548
1549 /* X86_TUNE_SHIFT1 */
1550 ~m_486,
1551
1552 /* X86_TUNE_USE_FFREEP */
1553 m_AMD_MULTIPLE,
1554
1555 /* X86_TUNE_INTER_UNIT_MOVES */
1556 ~(m_AMD_MULTIPLE | m_GENERIC),
1557
1558 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1559 ~(m_AMDFAM10 | m_BDVER1),
1560
1561 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1562 than 4 branch instructions in the 16 byte window. */
1563 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1564 | m_GENERIC,
1565
1566 /* X86_TUNE_SCHEDULE */
1567 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1568 | m_GENERIC,
1569
1570 /* X86_TUNE_USE_BT */
1571 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1572
1573 /* X86_TUNE_USE_INCDEC */
1574 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1575
1576 /* X86_TUNE_PAD_RETURNS */
1577 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1578
1579 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
1580 m_ATOM,
1581
1582 /* X86_TUNE_EXT_80387_CONSTANTS */
1583 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1584 | m_CORE2 | m_GENERIC,
1585
1586 /* X86_TUNE_SHORTEN_X87_SSE */
1587 ~m_K8,
1588
1589 /* X86_TUNE_AVOID_VECTOR_DECODE */
1590 m_K8 | m_GENERIC64,
1591
1592 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1593 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1594 ~(m_386 | m_486),
1595
1596 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1597 vector path on AMD machines. */
1598 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1599
1600 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1601 machines. */
1602 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1603
1604 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1605 than a MOV. */
1606 m_PENT,
1607
1608 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1609 but one byte longer. */
1610 m_PENT,
1611
1612 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1613 operand that cannot be represented using a modRM byte. The XOR
1614 replacement is long decoded, so this split helps here as well. */
1615 m_K6,
1616
1617 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1618 from FP to FP. */
1619 m_AMDFAM10 | m_GENERIC,
1620
1621 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1622 from integer to FP. */
1623 m_AMDFAM10,
1624
1625 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1626 with a subsequent conditional jump instruction into a single
1627 compare-and-branch uop. */
1628 m_CORE2 | m_BDVER1,
1629
1630 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1631 will impact LEA instruction selection. */
1632 m_ATOM,
1633
1634 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
1635 instructions. */
1636 ~m_ATOM,
1637 };
1638
1639 /* Feature tests against the various architecture variations. */
1640 unsigned char ix86_arch_features[X86_ARCH_LAST];
1641
1642 /* Feature tests against the various architecture variations, used to create
1643 ix86_arch_features based on the processor mask. */
1644 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1645 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1646 ~(m_386 | m_486 | m_PENT | m_K6),
1647
1648 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1649 ~m_386,
1650
1651 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1652 ~(m_386 | m_486),
1653
1654 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1655 ~m_386,
1656
1657 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1658 ~m_386,
1659 };
1660
1661 static const unsigned int x86_accumulate_outgoing_args
1662 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1663 | m_GENERIC;
1664
1665 static const unsigned int x86_arch_always_fancy_math_387
1666 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1667 | m_NOCONA | m_CORE2 | m_GENERIC;
1668
1669 static enum stringop_alg stringop_alg = no_stringop;
1670
1671 /* In case the average insn count for single function invocation is
1672 lower than this constant, emit fast (but longer) prologue and
1673 epilogue code. */
1674 #define FAST_PROLOGUE_INSN_COUNT 20
1675
1676 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1677 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1678 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1679 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1680
1681 /* Array of the smallest class containing reg number REGNO, indexed by
1682 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1683
1684 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1685 {
1686 /* ax, dx, cx, bx */
1687 AREG, DREG, CREG, BREG,
1688 /* si, di, bp, sp */
1689 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1690 /* FP registers */
1691 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1692 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1693 /* arg pointer */
1694 NON_Q_REGS,
1695 /* flags, fpsr, fpcr, frame */
1696 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1697 /* SSE registers */
1698 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1699 SSE_REGS, SSE_REGS,
1700 /* MMX registers */
1701 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1702 MMX_REGS, MMX_REGS,
1703 /* REX registers */
1704 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1705 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1706 /* SSE REX registers */
1707 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1708 SSE_REGS, SSE_REGS,
1709 };
1710
1711 /* The "default" register map used in 32bit mode. */
1712
1713 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1714 {
1715 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1716 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1717 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1718 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1719 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1720 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1721 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1722 };
1723
1724 /* The "default" register map used in 64bit mode. */
1725
1726 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1727 {
1728 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1729 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1730 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1731 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1732 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1733 8,9,10,11,12,13,14,15, /* extended integer registers */
1734 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1735 };
1736
1737 /* Define the register numbers to be used in Dwarf debugging information.
1738 The SVR4 reference port C compiler uses the following register numbers
1739 in its Dwarf output code:
1740 0 for %eax (gcc regno = 0)
1741 1 for %ecx (gcc regno = 2)
1742 2 for %edx (gcc regno = 1)
1743 3 for %ebx (gcc regno = 3)
1744 4 for %esp (gcc regno = 7)
1745 5 for %ebp (gcc regno = 6)
1746 6 for %esi (gcc regno = 4)
1747 7 for %edi (gcc regno = 5)
1748 The following three DWARF register numbers are never generated by
1749 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1750 believes these numbers have these meanings.
1751 8 for %eip (no gcc equivalent)
1752 9 for %eflags (gcc regno = 17)
1753 10 for %trapno (no gcc equivalent)
1754 It is not at all clear how we should number the FP stack registers
1755 for the x86 architecture. If the version of SDB on x86/svr4 were
1756 a bit less brain dead with respect to floating-point then we would
1757 have a precedent to follow with respect to DWARF register numbers
1758 for x86 FP registers, but the SDB on x86/svr4 is so completely
1759 broken with respect to FP registers that it is hardly worth thinking
1760 of it as something to strive for compatibility with.
1761 The version of x86/svr4 SDB I have at the moment does (partially)
1762 seem to believe that DWARF register number 11 is associated with
1763 the x86 register %st(0), but that's about all. Higher DWARF
1764 register numbers don't seem to be associated with anything in
1765 particular, and even for DWARF regno 11, SDB only seems to under-
1766 stand that it should say that a variable lives in %st(0) (when
1767 asked via an `=' command) if we said it was in DWARF regno 11,
1768 but SDB still prints garbage when asked for the value of the
1769 variable in question (via a `/' command).
1770 (Also note that the labels SDB prints for various FP stack regs
1771 when doing an `x' command are all wrong.)
1772 Note that these problems generally don't affect the native SVR4
1773 C compiler because it doesn't allow the use of -O with -g and
1774 because when it is *not* optimizing, it allocates a memory
1775 location for each floating-point variable, and the memory
1776 location is what gets described in the DWARF AT_location
1777 attribute for the variable in question.
1778 Regardless of the severe mental illness of the x86/svr4 SDB, we
1779 do something sensible here and we use the following DWARF
1780 register numbers. Note that these are all stack-top-relative
1781 numbers.
1782 11 for %st(0) (gcc regno = 8)
1783 12 for %st(1) (gcc regno = 9)
1784 13 for %st(2) (gcc regno = 10)
1785 14 for %st(3) (gcc regno = 11)
1786 15 for %st(4) (gcc regno = 12)
1787 16 for %st(5) (gcc regno = 13)
1788 17 for %st(6) (gcc regno = 14)
1789 18 for %st(7) (gcc regno = 15)
1790 */
1791 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1792 {
1793 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1794 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1795 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1796 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1797 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1798 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1799 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1800 };
1801
1802 /* Define parameter passing and return registers. */
1803
1804 static int const x86_64_int_parameter_registers[6] =
1805 {
1806 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1807 };
1808
1809 static int const x86_64_ms_abi_int_parameter_registers[4] =
1810 {
1811 CX_REG, DX_REG, R8_REG, R9_REG
1812 };
1813
1814 static int const x86_64_int_return_registers[4] =
1815 {
1816 AX_REG, DX_REG, DI_REG, SI_REG
1817 };
1818
1819 /* Define the structure for the machine field in struct function. */
1820
1821 struct GTY(()) stack_local_entry {
1822 unsigned short mode;
1823 unsigned short n;
1824 rtx rtl;
1825 struct stack_local_entry *next;
1826 };
1827
1828 /* Structure describing stack frame layout.
1829 Stack grows downward:
1830
1831 [arguments]
1832 <- ARG_POINTER
1833 saved pc
1834
1835 saved static chain if ix86_static_chain_on_stack
1836
1837 saved frame pointer if frame_pointer_needed
1838 <- HARD_FRAME_POINTER
1839 [saved regs]
1840 <- regs_save_offset
1841 [padding0]
1842
1843 [saved SSE regs]
1844 <- sse_regs_save_offset
1845 [padding1] |
1846 | <- FRAME_POINTER
1847 [va_arg registers] |
1848 |
1849 [frame] |
1850 |
1851 [padding2] | = to_allocate
1852 <- STACK_POINTER
1853 */
1854 struct ix86_frame
1855 {
1856 int nsseregs;
1857 int nregs;
1858 int va_arg_size;
1859 int red_zone_size;
1860 int outgoing_arguments_size;
1861 HOST_WIDE_INT frame;
1862
1863 /* The offsets relative to ARG_POINTER. */
1864 HOST_WIDE_INT frame_pointer_offset;
1865 HOST_WIDE_INT hard_frame_pointer_offset;
1866 HOST_WIDE_INT stack_pointer_offset;
1867 HOST_WIDE_INT reg_save_offset;
1868 HOST_WIDE_INT sse_reg_save_offset;
1869
1870 /* When save_regs_using_mov is set, emit prologue using
1871 move instead of push instructions. */
1872 bool save_regs_using_mov;
1873 };
1874
1875 /* Code model option. */
1876 enum cmodel ix86_cmodel;
1877 /* Asm dialect. */
1878 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1879 /* TLS dialects. */
1880 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1881
1882 /* Which unit we are generating floating point math for. */
1883 enum fpmath_unit ix86_fpmath;
1884
1885 /* Which cpu are we scheduling for. */
1886 enum attr_cpu ix86_schedule;
1887
1888 /* Which cpu are we optimizing for. */
1889 enum processor_type ix86_tune;
1890
1891 /* Which instruction set architecture to use. */
1892 enum processor_type ix86_arch;
1893
1894 /* true if sse prefetch instruction is not NOOP. */
1895 int x86_prefetch_sse;
1896
1897 /* ix86_regparm_string as a number */
1898 static int ix86_regparm;
1899
1900 /* -mstackrealign option */
1901 static const char ix86_force_align_arg_pointer_string[]
1902 = "force_align_arg_pointer";
1903
1904 static rtx (*ix86_gen_leave) (void);
1905 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1906 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1907 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
1908 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1909 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1910 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1911 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
1912 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
1913 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
1914
1915 /* Preferred alignment for stack boundary in bits. */
1916 unsigned int ix86_preferred_stack_boundary;
1917
1918 /* Alignment for incoming stack boundary in bits specified at
1919 command line. */
1920 static unsigned int ix86_user_incoming_stack_boundary;
1921
1922 /* Default alignment for incoming stack boundary in bits. */
1923 static unsigned int ix86_default_incoming_stack_boundary;
1924
1925 /* Alignment for incoming stack boundary in bits. */
1926 unsigned int ix86_incoming_stack_boundary;
1927
1928 /* The abi used by target. */
1929 enum calling_abi ix86_abi;
1930
1931 /* Values 1-5: see jump.c */
1932 int ix86_branch_cost;
1933
1934 /* Calling abi specific va_list type nodes. */
1935 static GTY(()) tree sysv_va_list_type_node;
1936 static GTY(()) tree ms_va_list_type_node;
1937
1938 /* Variables which are this size or smaller are put in the data/bss
1939 or ldata/lbss sections. */
1940
1941 int ix86_section_threshold = 65536;
1942
1943 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1944 char internal_label_prefix[16];
1945 int internal_label_prefix_len;
1946
1947 /* Fence to use after loop using movnt. */
1948 tree x86_mfence;
1949
1950 /* Register class used for passing given 64bit part of the argument.
1951 These represent classes as documented by the PS ABI, with the exception
1952 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1953 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1954
1955 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1956 whenever possible (upper half does contain padding). */
1957 enum x86_64_reg_class
1958 {
1959 X86_64_NO_CLASS,
1960 X86_64_INTEGER_CLASS,
1961 X86_64_INTEGERSI_CLASS,
1962 X86_64_SSE_CLASS,
1963 X86_64_SSESF_CLASS,
1964 X86_64_SSEDF_CLASS,
1965 X86_64_SSEUP_CLASS,
1966 X86_64_X87_CLASS,
1967 X86_64_X87UP_CLASS,
1968 X86_64_COMPLEX_X87_CLASS,
1969 X86_64_MEMORY_CLASS
1970 };
1971
1972 #define MAX_CLASSES 4
1973
1974 /* Table of constants used by fldpi, fldln2, etc.... */
1975 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1976 static bool ext_80387_constants_init = 0;
1977
1978 \f
1979 static struct machine_function * ix86_init_machine_status (void);
1980 static rtx ix86_function_value (const_tree, const_tree, bool);
1981 static bool ix86_function_value_regno_p (const unsigned int);
1982 static rtx ix86_static_chain (const_tree, bool);
1983 static int ix86_function_regparm (const_tree, const_tree);
1984 static void ix86_compute_frame_layout (struct ix86_frame *);
1985 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1986 rtx, rtx, int);
1987 static void ix86_add_new_builtins (int);
1988 static rtx ix86_expand_vec_perm_builtin (tree);
1989 static tree ix86_canonical_va_list_type (tree);
1990 static void predict_jump (int);
1991 static unsigned int split_stack_prologue_scratch_regno (void);
1992 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
1993
1994 enum ix86_function_specific_strings
1995 {
1996 IX86_FUNCTION_SPECIFIC_ARCH,
1997 IX86_FUNCTION_SPECIFIC_TUNE,
1998 IX86_FUNCTION_SPECIFIC_FPMATH,
1999 IX86_FUNCTION_SPECIFIC_MAX
2000 };
2001
2002 static char *ix86_target_string (int, int, const char *, const char *,
2003 const char *, bool);
2004 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2005 static void ix86_function_specific_save (struct cl_target_option *);
2006 static void ix86_function_specific_restore (struct cl_target_option *);
2007 static void ix86_function_specific_print (FILE *, int,
2008 struct cl_target_option *);
2009 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2010 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
2011 static bool ix86_can_inline_p (tree, tree);
2012 static void ix86_set_current_function (tree);
2013 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2014
2015 static enum calling_abi ix86_function_abi (const_tree);
2016
2017 \f
2018 #ifndef SUBTARGET32_DEFAULT_CPU
2019 #define SUBTARGET32_DEFAULT_CPU "i386"
2020 #endif
2021
2022 /* The svr4 ABI for the i386 says that records and unions are returned
2023 in memory. */
2024 #ifndef DEFAULT_PCC_STRUCT_RETURN
2025 #define DEFAULT_PCC_STRUCT_RETURN 1
2026 #endif
2027
2028 /* Whether -mtune= or -march= were specified */
2029 static int ix86_tune_defaulted;
2030 static int ix86_arch_specified;
2031
2032 /* A mask of ix86_isa_flags that includes bit X if X
2033 was set or cleared on the command line. */
2034 static int ix86_isa_flags_explicit;
2035
2036 /* Define a set of ISAs which are available when a given ISA is
2037 enabled. MMX and SSE ISAs are handled separately. */
2038
2039 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
2040 #define OPTION_MASK_ISA_3DNOW_SET \
2041 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
2042
2043 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
2044 #define OPTION_MASK_ISA_SSE2_SET \
2045 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
2046 #define OPTION_MASK_ISA_SSE3_SET \
2047 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
2048 #define OPTION_MASK_ISA_SSSE3_SET \
2049 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
2050 #define OPTION_MASK_ISA_SSE4_1_SET \
2051 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
2052 #define OPTION_MASK_ISA_SSE4_2_SET \
2053 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
2054 #define OPTION_MASK_ISA_AVX_SET \
2055 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
2056 #define OPTION_MASK_ISA_FMA_SET \
2057 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
2058
2059 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
2060 as -msse4.2. */
2061 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
2062
2063 #define OPTION_MASK_ISA_SSE4A_SET \
2064 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
2065 #define OPTION_MASK_ISA_FMA4_SET \
2066 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
2067 | OPTION_MASK_ISA_AVX_SET)
2068 #define OPTION_MASK_ISA_XOP_SET \
2069 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
2070 #define OPTION_MASK_ISA_LWP_SET \
2071 OPTION_MASK_ISA_LWP
2072
2073 /* AES and PCLMUL need SSE2 because they use xmm registers */
2074 #define OPTION_MASK_ISA_AES_SET \
2075 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
2076 #define OPTION_MASK_ISA_PCLMUL_SET \
2077 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
2078
2079 #define OPTION_MASK_ISA_ABM_SET \
2080 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
2081
2082 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
2083 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
2084 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
2085 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
2086 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
2087
2088 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
2089 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
2090 #define OPTION_MASK_ISA_F16C_SET \
2091 (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
2092
2093 /* Define a set of ISAs which aren't available when a given ISA is
2094 disabled. MMX and SSE ISAs are handled separately. */
2095
2096 #define OPTION_MASK_ISA_MMX_UNSET \
2097 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
2098 #define OPTION_MASK_ISA_3DNOW_UNSET \
2099 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
2100 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
2101
2102 #define OPTION_MASK_ISA_SSE_UNSET \
2103 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2104 #define OPTION_MASK_ISA_SSE2_UNSET \
2105 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2106 #define OPTION_MASK_ISA_SSE3_UNSET \
2107 (OPTION_MASK_ISA_SSE3 \
2108 | OPTION_MASK_ISA_SSSE3_UNSET \
2109 | OPTION_MASK_ISA_SSE4A_UNSET )
2110 #define OPTION_MASK_ISA_SSSE3_UNSET \
2111 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2112 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2113 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2114 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2115 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2116 #define OPTION_MASK_ISA_AVX_UNSET \
2117 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2118 | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
2119 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2120
2121 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2122 as -mno-sse4.1. */
2123 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2124
2125 #define OPTION_MASK_ISA_SSE4A_UNSET \
2126 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2127
2128 #define OPTION_MASK_ISA_FMA4_UNSET \
2129 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2130 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2131 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2132
2133 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2134 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2135 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2136 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2137 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2138 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2139 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2140 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2141
2142 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
2143 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
2144 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
2145
2146 /* Vectorization library interface and handlers. */
2147 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2148
2149 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2150 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2151
2152 /* Processor target table, indexed by processor number */
2153 struct ptt
2154 {
2155 const struct processor_costs *cost; /* Processor costs */
2156 const int align_loop; /* Default alignments. */
2157 const int align_loop_max_skip;
2158 const int align_jump;
2159 const int align_jump_max_skip;
2160 const int align_func;
2161 };
2162
2163 static const struct ptt processor_target_table[PROCESSOR_max] =
2164 {
2165 {&i386_cost, 4, 3, 4, 3, 4},
2166 {&i486_cost, 16, 15, 16, 15, 16},
2167 {&pentium_cost, 16, 7, 16, 7, 16},
2168 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2169 {&geode_cost, 0, 0, 0, 0, 0},
2170 {&k6_cost, 32, 7, 32, 7, 32},
2171 {&athlon_cost, 16, 7, 16, 7, 16},
2172 {&pentium4_cost, 0, 0, 0, 0, 0},
2173 {&k8_cost, 16, 7, 16, 7, 16},
2174 {&nocona_cost, 0, 0, 0, 0, 0},
2175 {&core2_cost, 16, 10, 16, 10, 16},
2176 {&generic32_cost, 16, 7, 16, 7, 16},
2177 {&generic64_cost, 16, 10, 16, 10, 16},
2178 {&amdfam10_cost, 32, 24, 32, 7, 32},
2179 {&bdver1_cost, 32, 24, 32, 7, 32},
2180 {&atom_cost, 16, 7, 16, 7, 16}
2181 };
2182
2183 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2184 {
2185 "generic",
2186 "i386",
2187 "i486",
2188 "pentium",
2189 "pentium-mmx",
2190 "pentiumpro",
2191 "pentium2",
2192 "pentium3",
2193 "pentium4",
2194 "pentium-m",
2195 "prescott",
2196 "nocona",
2197 "core2",
2198 "atom",
2199 "geode",
2200 "k6",
2201 "k6-2",
2202 "k6-3",
2203 "athlon",
2204 "athlon-4",
2205 "k8",
2206 "amdfam10",
2207 "bdver1"
2208 };
2209 \f
2210 /* Return true if a red-zone is in use. */
2211
2212 static inline bool
2213 ix86_using_red_zone (void)
2214 {
2215 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2216 }
2217
2218 /* Implement TARGET_HANDLE_OPTION. */
2219
2220 static bool
2221 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2222 {
2223 switch (code)
2224 {
2225 case OPT_mmmx:
2226 if (value)
2227 {
2228 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2229 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2230 }
2231 else
2232 {
2233 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2234 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2235 }
2236 return true;
2237
2238 case OPT_m3dnow:
2239 if (value)
2240 {
2241 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2242 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2243 }
2244 else
2245 {
2246 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2247 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2248 }
2249 return true;
2250
2251 case OPT_m3dnowa:
2252 return false;
2253
2254 case OPT_msse:
2255 if (value)
2256 {
2257 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2258 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2259 }
2260 else
2261 {
2262 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2263 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2264 }
2265 return true;
2266
2267 case OPT_msse2:
2268 if (value)
2269 {
2270 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2271 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2272 }
2273 else
2274 {
2275 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2276 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2277 }
2278 return true;
2279
2280 case OPT_msse3:
2281 if (value)
2282 {
2283 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2284 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2285 }
2286 else
2287 {
2288 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2289 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2290 }
2291 return true;
2292
2293 case OPT_mssse3:
2294 if (value)
2295 {
2296 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2297 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2298 }
2299 else
2300 {
2301 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2302 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2303 }
2304 return true;
2305
2306 case OPT_msse4_1:
2307 if (value)
2308 {
2309 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2310 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2311 }
2312 else
2313 {
2314 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2315 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2316 }
2317 return true;
2318
2319 case OPT_msse4_2:
2320 if (value)
2321 {
2322 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2323 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2324 }
2325 else
2326 {
2327 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2328 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2329 }
2330 return true;
2331
2332 case OPT_mavx:
2333 if (value)
2334 {
2335 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2336 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2337 }
2338 else
2339 {
2340 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2341 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2342 }
2343 return true;
2344
2345 case OPT_mfma:
2346 if (value)
2347 {
2348 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2349 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2350 }
2351 else
2352 {
2353 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2354 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2355 }
2356 return true;
2357
2358 case OPT_msse4:
2359 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2360 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2361 return true;
2362
2363 case OPT_mno_sse4:
2364 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2365 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2366 return true;
2367
2368 case OPT_msse4a:
2369 if (value)
2370 {
2371 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2372 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2373 }
2374 else
2375 {
2376 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2377 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2378 }
2379 return true;
2380
2381 case OPT_mfma4:
2382 if (value)
2383 {
2384 ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2385 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2386 }
2387 else
2388 {
2389 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2390 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2391 }
2392 return true;
2393
2394 case OPT_mxop:
2395 if (value)
2396 {
2397 ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2398 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2399 }
2400 else
2401 {
2402 ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2403 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2404 }
2405 return true;
2406
2407 case OPT_mlwp:
2408 if (value)
2409 {
2410 ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2411 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2412 }
2413 else
2414 {
2415 ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2416 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2417 }
2418 return true;
2419
2420 case OPT_mabm:
2421 if (value)
2422 {
2423 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2424 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2425 }
2426 else
2427 {
2428 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2429 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2430 }
2431 return true;
2432
2433 case OPT_mpopcnt:
2434 if (value)
2435 {
2436 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2437 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2438 }
2439 else
2440 {
2441 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2442 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2443 }
2444 return true;
2445
2446 case OPT_msahf:
2447 if (value)
2448 {
2449 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2450 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2451 }
2452 else
2453 {
2454 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2455 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2456 }
2457 return true;
2458
2459 case OPT_mcx16:
2460 if (value)
2461 {
2462 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2463 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2464 }
2465 else
2466 {
2467 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2468 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2469 }
2470 return true;
2471
2472 case OPT_mmovbe:
2473 if (value)
2474 {
2475 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2476 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2477 }
2478 else
2479 {
2480 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2481 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2482 }
2483 return true;
2484
2485 case OPT_mcrc32:
2486 if (value)
2487 {
2488 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2489 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2490 }
2491 else
2492 {
2493 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2494 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2495 }
2496 return true;
2497
2498 case OPT_maes:
2499 if (value)
2500 {
2501 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2502 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2503 }
2504 else
2505 {
2506 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2507 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2508 }
2509 return true;
2510
2511 case OPT_mpclmul:
2512 if (value)
2513 {
2514 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2515 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2516 }
2517 else
2518 {
2519 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2520 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2521 }
2522 return true;
2523
2524 case OPT_mfsgsbase:
2525 if (value)
2526 {
2527 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
2528 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
2529 }
2530 else
2531 {
2532 ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
2533 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
2534 }
2535 return true;
2536
2537 case OPT_mrdrnd:
2538 if (value)
2539 {
2540 ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
2541 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
2542 }
2543 else
2544 {
2545 ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
2546 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
2547 }
2548 return true;
2549
2550 case OPT_mf16c:
2551 if (value)
2552 {
2553 ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
2554 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
2555 }
2556 else
2557 {
2558 ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
2559 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
2560 }
2561 return true;
2562
2563 default:
2564 return true;
2565 }
2566 }
2567 \f
2568 /* Return a string that documents the current -m options. The caller is
2569 responsible for freeing the string. */
2570
2571 static char *
2572 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2573 const char *fpmath, bool add_nl_p)
2574 {
2575 struct ix86_target_opts
2576 {
2577 const char *option; /* option string */
2578 int mask; /* isa mask options */
2579 };
2580
2581 /* This table is ordered so that options like -msse4.2 that imply
2582 preceding options while match those first. */
2583 static struct ix86_target_opts isa_opts[] =
2584 {
2585 { "-m64", OPTION_MASK_ISA_64BIT },
2586 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2587 { "-mfma", OPTION_MASK_ISA_FMA },
2588 { "-mxop", OPTION_MASK_ISA_XOP },
2589 { "-mlwp", OPTION_MASK_ISA_LWP },
2590 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2591 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2592 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2593 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2594 { "-msse3", OPTION_MASK_ISA_SSE3 },
2595 { "-msse2", OPTION_MASK_ISA_SSE2 },
2596 { "-msse", OPTION_MASK_ISA_SSE },
2597 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2598 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2599 { "-mmmx", OPTION_MASK_ISA_MMX },
2600 { "-mabm", OPTION_MASK_ISA_ABM },
2601 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2602 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2603 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2604 { "-maes", OPTION_MASK_ISA_AES },
2605 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2606 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2607 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2608 { "-mf16c", OPTION_MASK_ISA_F16C },
2609 };
2610
2611 /* Flag options. */
2612 static struct ix86_target_opts flag_opts[] =
2613 {
2614 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2615 { "-m80387", MASK_80387 },
2616 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2617 { "-malign-double", MASK_ALIGN_DOUBLE },
2618 { "-mcld", MASK_CLD },
2619 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2620 { "-mieee-fp", MASK_IEEE_FP },
2621 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2622 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2623 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2624 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2625 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2626 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2627 { "-mno-red-zone", MASK_NO_RED_ZONE },
2628 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2629 { "-mrecip", MASK_RECIP },
2630 { "-mrtd", MASK_RTD },
2631 { "-msseregparm", MASK_SSEREGPARM },
2632 { "-mstack-arg-probe", MASK_STACK_PROBE },
2633 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2634 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2635 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2636 };
2637
2638 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2639
2640 char isa_other[40];
2641 char target_other[40];
2642 unsigned num = 0;
2643 unsigned i, j;
2644 char *ret;
2645 char *ptr;
2646 size_t len;
2647 size_t line_len;
2648 size_t sep_len;
2649
2650 memset (opts, '\0', sizeof (opts));
2651
2652 /* Add -march= option. */
2653 if (arch)
2654 {
2655 opts[num][0] = "-march=";
2656 opts[num++][1] = arch;
2657 }
2658
2659 /* Add -mtune= option. */
2660 if (tune)
2661 {
2662 opts[num][0] = "-mtune=";
2663 opts[num++][1] = tune;
2664 }
2665
2666 /* Pick out the options in isa options. */
2667 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2668 {
2669 if ((isa & isa_opts[i].mask) != 0)
2670 {
2671 opts[num++][0] = isa_opts[i].option;
2672 isa &= ~ isa_opts[i].mask;
2673 }
2674 }
2675
2676 if (isa && add_nl_p)
2677 {
2678 opts[num++][0] = isa_other;
2679 sprintf (isa_other, "(other isa: %#x)", isa);
2680 }
2681
2682 /* Add flag options. */
2683 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2684 {
2685 if ((flags & flag_opts[i].mask) != 0)
2686 {
2687 opts[num++][0] = flag_opts[i].option;
2688 flags &= ~ flag_opts[i].mask;
2689 }
2690 }
2691
2692 if (flags && add_nl_p)
2693 {
2694 opts[num++][0] = target_other;
2695 sprintf (target_other, "(other flags: %#x)", flags);
2696 }
2697
2698 /* Add -fpmath= option. */
2699 if (fpmath)
2700 {
2701 opts[num][0] = "-mfpmath=";
2702 opts[num++][1] = fpmath;
2703 }
2704
2705 /* Any options? */
2706 if (num == 0)
2707 return NULL;
2708
2709 gcc_assert (num < ARRAY_SIZE (opts));
2710
2711 /* Size the string. */
2712 len = 0;
2713 sep_len = (add_nl_p) ? 3 : 1;
2714 for (i = 0; i < num; i++)
2715 {
2716 len += sep_len;
2717 for (j = 0; j < 2; j++)
2718 if (opts[i][j])
2719 len += strlen (opts[i][j]);
2720 }
2721
2722 /* Build the string. */
2723 ret = ptr = (char *) xmalloc (len);
2724 line_len = 0;
2725
2726 for (i = 0; i < num; i++)
2727 {
2728 size_t len2[2];
2729
2730 for (j = 0; j < 2; j++)
2731 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2732
2733 if (i != 0)
2734 {
2735 *ptr++ = ' ';
2736 line_len++;
2737
2738 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2739 {
2740 *ptr++ = '\\';
2741 *ptr++ = '\n';
2742 line_len = 0;
2743 }
2744 }
2745
2746 for (j = 0; j < 2; j++)
2747 if (opts[i][j])
2748 {
2749 memcpy (ptr, opts[i][j], len2[j]);
2750 ptr += len2[j];
2751 line_len += len2[j];
2752 }
2753 }
2754
2755 *ptr = '\0';
2756 gcc_assert (ret + len >= ptr);
2757
2758 return ret;
2759 }
2760
2761 /* Return TRUE if software prefetching is beneficial for the
2762 given CPU. */
2763
2764 static bool
2765 software_prefetching_beneficial_p (void)
2766 {
2767 switch (ix86_tune)
2768 {
2769 case PROCESSOR_GEODE:
2770 case PROCESSOR_K6:
2771 case PROCESSOR_ATHLON:
2772 case PROCESSOR_K8:
2773 case PROCESSOR_AMDFAM10:
2774 return true;
2775
2776 default:
2777 return false;
2778 }
2779 }
2780
2781 /* Return true, if profiling code should be emitted before
2782 prologue. Otherwise it returns false.
2783 Note: For x86 with "hotfix" it is sorried. */
2784 static bool
2785 ix86_profile_before_prologue (void)
2786 {
2787 return flag_fentry != 0;
2788 }
2789
2790 /* Function that is callable from the debugger to print the current
2791 options. */
2792 void
2793 ix86_debug_options (void)
2794 {
2795 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2796 ix86_arch_string, ix86_tune_string,
2797 ix86_fpmath_string, true);
2798
2799 if (opts)
2800 {
2801 fprintf (stderr, "%s\n\n", opts);
2802 free (opts);
2803 }
2804 else
2805 fputs ("<no options>\n\n", stderr);
2806
2807 return;
2808 }
2809 \f
2810 /* Override various settings based on options. If MAIN_ARGS_P, the
2811 options are from the command line, otherwise they are from
2812 attributes. */
2813
2814 static void
2815 ix86_option_override_internal (bool main_args_p)
2816 {
2817 int i;
2818 unsigned int ix86_arch_mask, ix86_tune_mask;
2819 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2820 const char *prefix;
2821 const char *suffix;
2822 const char *sw;
2823
2824 /* Comes from final.c -- no real reason to change it. */
2825 #define MAX_CODE_ALIGN 16
2826
2827 enum pta_flags
2828 {
2829 PTA_SSE = 1 << 0,
2830 PTA_SSE2 = 1 << 1,
2831 PTA_SSE3 = 1 << 2,
2832 PTA_MMX = 1 << 3,
2833 PTA_PREFETCH_SSE = 1 << 4,
2834 PTA_3DNOW = 1 << 5,
2835 PTA_3DNOW_A = 1 << 6,
2836 PTA_64BIT = 1 << 7,
2837 PTA_SSSE3 = 1 << 8,
2838 PTA_CX16 = 1 << 9,
2839 PTA_POPCNT = 1 << 10,
2840 PTA_ABM = 1 << 11,
2841 PTA_SSE4A = 1 << 12,
2842 PTA_NO_SAHF = 1 << 13,
2843 PTA_SSE4_1 = 1 << 14,
2844 PTA_SSE4_2 = 1 << 15,
2845 PTA_AES = 1 << 16,
2846 PTA_PCLMUL = 1 << 17,
2847 PTA_AVX = 1 << 18,
2848 PTA_FMA = 1 << 19,
2849 PTA_MOVBE = 1 << 20,
2850 PTA_FMA4 = 1 << 21,
2851 PTA_XOP = 1 << 22,
2852 PTA_LWP = 1 << 23,
2853 PTA_FSGSBASE = 1 << 24,
2854 PTA_RDRND = 1 << 25,
2855 PTA_F16C = 1 << 26
2856 };
2857
2858 static struct pta
2859 {
2860 const char *const name; /* processor name or nickname. */
2861 const enum processor_type processor;
2862 const enum attr_cpu schedule;
2863 const unsigned /*enum pta_flags*/ flags;
2864 }
2865 const processor_alias_table[] =
2866 {
2867 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2868 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2869 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2870 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2871 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2872 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2873 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2874 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2875 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2876 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2877 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2878 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2879 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2880 PTA_MMX | PTA_SSE},
2881 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2882 PTA_MMX | PTA_SSE},
2883 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2884 PTA_MMX | PTA_SSE | PTA_SSE2},
2885 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2886 PTA_MMX |PTA_SSE | PTA_SSE2},
2887 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2888 PTA_MMX | PTA_SSE | PTA_SSE2},
2889 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2890 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2891 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2892 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2893 | PTA_CX16 | PTA_NO_SAHF},
2894 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2895 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2896 | PTA_SSSE3 | PTA_CX16},
2897 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2898 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2899 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2900 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2901 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2902 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2903 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2904 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2905 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2906 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2907 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2908 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2909 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2910 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2911 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2912 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2913 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2914 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2915 {"x86-64", PROCESSOR_K8, CPU_K8,
2916 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2917 {"k8", PROCESSOR_K8, CPU_K8,
2918 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2919 | PTA_SSE2 | PTA_NO_SAHF},
2920 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2921 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2922 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2923 {"opteron", PROCESSOR_K8, CPU_K8,
2924 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2925 | PTA_SSE2 | PTA_NO_SAHF},
2926 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2927 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2928 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2929 {"athlon64", PROCESSOR_K8, CPU_K8,
2930 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2931 | PTA_SSE2 | PTA_NO_SAHF},
2932 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2933 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2934 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2935 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2936 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2937 | PTA_SSE2 | PTA_NO_SAHF},
2938 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2939 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2940 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2941 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2942 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2943 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2944 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
2945 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2946 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM
2947 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES
2948 | PTA_PCLMUL | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP},
2949 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2950 0 /* flags are only used for -march switch. */ },
2951 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2952 PTA_64BIT /* flags are only used for -march switch. */ },
2953 };
2954
2955 int const pta_size = ARRAY_SIZE (processor_alias_table);
2956
2957 /* Set up prefix/suffix so the error messages refer to either the command
2958 line argument, or the attribute(target). */
2959 if (main_args_p)
2960 {
2961 prefix = "-m";
2962 suffix = "";
2963 sw = "switch";
2964 }
2965 else
2966 {
2967 prefix = "option(\"";
2968 suffix = "\")";
2969 sw = "attribute";
2970 }
2971
2972 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2973 SUBTARGET_OVERRIDE_OPTIONS;
2974 #endif
2975
2976 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2977 SUBSUBTARGET_OVERRIDE_OPTIONS;
2978 #endif
2979
2980 /* -fPIC is the default for x86_64. */
2981 if (TARGET_MACHO && TARGET_64BIT)
2982 flag_pic = 2;
2983
2984 /* Need to check -mtune=generic first. */
2985 if (ix86_tune_string)
2986 {
2987 if (!strcmp (ix86_tune_string, "generic")
2988 || !strcmp (ix86_tune_string, "i686")
2989 /* As special support for cross compilers we read -mtune=native
2990 as -mtune=generic. With native compilers we won't see the
2991 -mtune=native, as it was changed by the driver. */
2992 || !strcmp (ix86_tune_string, "native"))
2993 {
2994 if (TARGET_64BIT)
2995 ix86_tune_string = "generic64";
2996 else
2997 ix86_tune_string = "generic32";
2998 }
2999 /* If this call is for setting the option attribute, allow the
3000 generic32/generic64 that was previously set. */
3001 else if (!main_args_p
3002 && (!strcmp (ix86_tune_string, "generic32")
3003 || !strcmp (ix86_tune_string, "generic64")))
3004 ;
3005 else if (!strncmp (ix86_tune_string, "generic", 7))
3006 error ("bad value (%s) for %stune=%s %s",
3007 ix86_tune_string, prefix, suffix, sw);
3008 else if (!strcmp (ix86_tune_string, "x86-64"))
3009 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
3010 "%stune=k8%s or %stune=generic%s instead as appropriate.",
3011 prefix, suffix, prefix, suffix, prefix, suffix);
3012 }
3013 else
3014 {
3015 if (ix86_arch_string)
3016 ix86_tune_string = ix86_arch_string;
3017 if (!ix86_tune_string)
3018 {
3019 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3020 ix86_tune_defaulted = 1;
3021 }
3022
3023 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3024 need to use a sensible tune option. */
3025 if (!strcmp (ix86_tune_string, "generic")
3026 || !strcmp (ix86_tune_string, "x86-64")
3027 || !strcmp (ix86_tune_string, "i686"))
3028 {
3029 if (TARGET_64BIT)
3030 ix86_tune_string = "generic64";
3031 else
3032 ix86_tune_string = "generic32";
3033 }
3034 }
3035
3036 if (ix86_stringop_string)
3037 {
3038 if (!strcmp (ix86_stringop_string, "rep_byte"))
3039 stringop_alg = rep_prefix_1_byte;
3040 else if (!strcmp (ix86_stringop_string, "libcall"))
3041 stringop_alg = libcall;
3042 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
3043 stringop_alg = rep_prefix_4_byte;
3044 else if (!strcmp (ix86_stringop_string, "rep_8byte")
3045 && TARGET_64BIT)
3046 /* rep; movq isn't available in 32-bit code. */
3047 stringop_alg = rep_prefix_8_byte;
3048 else if (!strcmp (ix86_stringop_string, "byte_loop"))
3049 stringop_alg = loop_1_byte;
3050 else if (!strcmp (ix86_stringop_string, "loop"))
3051 stringop_alg = loop;
3052 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
3053 stringop_alg = unrolled_loop;
3054 else
3055 error ("bad value (%s) for %sstringop-strategy=%s %s",
3056 ix86_stringop_string, prefix, suffix, sw);
3057 }
3058
3059 if (!ix86_arch_string)
3060 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3061 else
3062 ix86_arch_specified = 1;
3063
3064 /* Validate -mabi= value. */
3065 if (ix86_abi_string)
3066 {
3067 if (strcmp (ix86_abi_string, "sysv") == 0)
3068 ix86_abi = SYSV_ABI;
3069 else if (strcmp (ix86_abi_string, "ms") == 0)
3070 ix86_abi = MS_ABI;
3071 else
3072 error ("unknown ABI (%s) for %sabi=%s %s",
3073 ix86_abi_string, prefix, suffix, sw);
3074 }
3075 else
3076 ix86_abi = DEFAULT_ABI;
3077
3078 if (ix86_cmodel_string != 0)
3079 {
3080 if (!strcmp (ix86_cmodel_string, "small"))
3081 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3082 else if (!strcmp (ix86_cmodel_string, "medium"))
3083 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
3084 else if (!strcmp (ix86_cmodel_string, "large"))
3085 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
3086 else if (flag_pic)
3087 error ("code model %s does not support PIC mode", ix86_cmodel_string);
3088 else if (!strcmp (ix86_cmodel_string, "32"))
3089 ix86_cmodel = CM_32;
3090 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
3091 ix86_cmodel = CM_KERNEL;
3092 else
3093 error ("bad value (%s) for %scmodel=%s %s",
3094 ix86_cmodel_string, prefix, suffix, sw);
3095 }
3096 else
3097 {
3098 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3099 use of rip-relative addressing. This eliminates fixups that
3100 would otherwise be needed if this object is to be placed in a
3101 DLL, and is essentially just as efficient as direct addressing. */
3102 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3103 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3104 else if (TARGET_64BIT)
3105 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3106 else
3107 ix86_cmodel = CM_32;
3108 }
3109 if (ix86_asm_string != 0)
3110 {
3111 if (! TARGET_MACHO
3112 && !strcmp (ix86_asm_string, "intel"))
3113 ix86_asm_dialect = ASM_INTEL;
3114 else if (!strcmp (ix86_asm_string, "att"))
3115 ix86_asm_dialect = ASM_ATT;
3116 else
3117 error ("bad value (%s) for %sasm=%s %s",
3118 ix86_asm_string, prefix, suffix, sw);
3119 }
3120 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
3121 error ("code model %qs not supported in the %s bit mode",
3122 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
3123 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3124 sorry ("%i-bit mode not compiled in",
3125 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3126
3127 for (i = 0; i < pta_size; i++)
3128 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3129 {
3130 ix86_schedule = processor_alias_table[i].schedule;
3131 ix86_arch = processor_alias_table[i].processor;
3132 /* Default cpu tuning to the architecture. */
3133 ix86_tune = ix86_arch;
3134
3135 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3136 error ("CPU you selected does not support x86-64 "
3137 "instruction set");
3138
3139 if (processor_alias_table[i].flags & PTA_MMX
3140 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3141 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3142 if (processor_alias_table[i].flags & PTA_3DNOW
3143 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3144 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3145 if (processor_alias_table[i].flags & PTA_3DNOW_A
3146 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3147 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3148 if (processor_alias_table[i].flags & PTA_SSE
3149 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3150 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3151 if (processor_alias_table[i].flags & PTA_SSE2
3152 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3153 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3154 if (processor_alias_table[i].flags & PTA_SSE3
3155 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3156 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3157 if (processor_alias_table[i].flags & PTA_SSSE3
3158 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3159 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3160 if (processor_alias_table[i].flags & PTA_SSE4_1
3161 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3162 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3163 if (processor_alias_table[i].flags & PTA_SSE4_2
3164 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3165 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3166 if (processor_alias_table[i].flags & PTA_AVX
3167 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3168 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3169 if (processor_alias_table[i].flags & PTA_FMA
3170 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3171 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3172 if (processor_alias_table[i].flags & PTA_SSE4A
3173 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3174 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3175 if (processor_alias_table[i].flags & PTA_FMA4
3176 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3177 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3178 if (processor_alias_table[i].flags & PTA_XOP
3179 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3180 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3181 if (processor_alias_table[i].flags & PTA_LWP
3182 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3183 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3184 if (processor_alias_table[i].flags & PTA_ABM
3185 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3186 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3187 if (processor_alias_table[i].flags & PTA_CX16
3188 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3189 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3190 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3191 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3192 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3193 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3194 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3195 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3196 if (processor_alias_table[i].flags & PTA_MOVBE
3197 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3198 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3199 if (processor_alias_table[i].flags & PTA_AES
3200 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3201 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3202 if (processor_alias_table[i].flags & PTA_PCLMUL
3203 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3204 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3205 if (processor_alias_table[i].flags & PTA_FSGSBASE
3206 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3207 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3208 if (processor_alias_table[i].flags & PTA_RDRND
3209 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3210 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3211 if (processor_alias_table[i].flags & PTA_F16C
3212 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3213 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3214 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3215 x86_prefetch_sse = true;
3216
3217 break;
3218 }
3219
3220 if (!strcmp (ix86_arch_string, "generic"))
3221 error ("generic CPU can be used only for %stune=%s %s",
3222 prefix, suffix, sw);
3223 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3224 error ("bad value (%s) for %sarch=%s %s",
3225 ix86_arch_string, prefix, suffix, sw);
3226
3227 ix86_arch_mask = 1u << ix86_arch;
3228 for (i = 0; i < X86_ARCH_LAST; ++i)
3229 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3230
3231 for (i = 0; i < pta_size; i++)
3232 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3233 {
3234 ix86_schedule = processor_alias_table[i].schedule;
3235 ix86_tune = processor_alias_table[i].processor;
3236 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3237 {
3238 if (ix86_tune_defaulted)
3239 {
3240 ix86_tune_string = "x86-64";
3241 for (i = 0; i < pta_size; i++)
3242 if (! strcmp (ix86_tune_string,
3243 processor_alias_table[i].name))
3244 break;
3245 ix86_schedule = processor_alias_table[i].schedule;
3246 ix86_tune = processor_alias_table[i].processor;
3247 }
3248 else
3249 error ("CPU you selected does not support x86-64 "
3250 "instruction set");
3251 }
3252 /* Intel CPUs have always interpreted SSE prefetch instructions as
3253 NOPs; so, we can enable SSE prefetch instructions even when
3254 -mtune (rather than -march) points us to a processor that has them.
3255 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3256 higher processors. */
3257 if (TARGET_CMOVE
3258 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3259 x86_prefetch_sse = true;
3260 break;
3261 }
3262
3263 if (ix86_tune_specified && i == pta_size)
3264 error ("bad value (%s) for %stune=%s %s",
3265 ix86_tune_string, prefix, suffix, sw);
3266
3267 ix86_tune_mask = 1u << ix86_tune;
3268 for (i = 0; i < X86_TUNE_LAST; ++i)
3269 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3270
3271 #ifndef USE_IX86_FRAME_POINTER
3272 #define USE_IX86_FRAME_POINTER 0
3273 #endif
3274
3275 /* Set the default values for switches whose default depends on TARGET_64BIT
3276 in case they weren't overwritten by command line options. */
3277 if (TARGET_64BIT)
3278 {
3279 if (flag_zee == 2)
3280 flag_zee = 1;
3281 if (flag_omit_frame_pointer == 2)
3282 flag_omit_frame_pointer = 1;
3283 if (flag_asynchronous_unwind_tables == 2)
3284 flag_asynchronous_unwind_tables = 1;
3285 if (flag_pcc_struct_return == 2)
3286 flag_pcc_struct_return = 0;
3287 }
3288 else
3289 {
3290 if (flag_zee == 2)
3291 flag_zee = 0;
3292 if (flag_omit_frame_pointer == 2)
3293 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3294 if (flag_asynchronous_unwind_tables == 2)
3295 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3296 if (flag_pcc_struct_return == 2)
3297 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3298 }
3299
3300 if (optimize_size)
3301 ix86_cost = &ix86_size_cost;
3302 else
3303 ix86_cost = processor_target_table[ix86_tune].cost;
3304
3305 /* Arrange to set up i386_stack_locals for all functions. */
3306 init_machine_status = ix86_init_machine_status;
3307
3308 /* Validate -mregparm= value. */
3309 if (ix86_regparm_string)
3310 {
3311 if (TARGET_64BIT)
3312 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3313 i = atoi (ix86_regparm_string);
3314 if (i < 0 || i > REGPARM_MAX)
3315 error ("%sregparm=%d%s is not between 0 and %d",
3316 prefix, i, suffix, REGPARM_MAX);
3317 else
3318 ix86_regparm = i;
3319 }
3320 if (TARGET_64BIT)
3321 ix86_regparm = REGPARM_MAX;
3322
3323 /* If the user has provided any of the -malign-* options,
3324 warn and use that value only if -falign-* is not set.
3325 Remove this code in GCC 3.2 or later. */
3326 if (ix86_align_loops_string)
3327 {
3328 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3329 prefix, suffix, suffix);
3330 if (align_loops == 0)
3331 {
3332 i = atoi (ix86_align_loops_string);
3333 if (i < 0 || i > MAX_CODE_ALIGN)
3334 error ("%salign-loops=%d%s is not between 0 and %d",
3335 prefix, i, suffix, MAX_CODE_ALIGN);
3336 else
3337 align_loops = 1 << i;
3338 }
3339 }
3340
3341 if (ix86_align_jumps_string)
3342 {
3343 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3344 prefix, suffix, suffix);
3345 if (align_jumps == 0)
3346 {
3347 i = atoi (ix86_align_jumps_string);
3348 if (i < 0 || i > MAX_CODE_ALIGN)
3349 error ("%salign-loops=%d%s is not between 0 and %d",
3350 prefix, i, suffix, MAX_CODE_ALIGN);
3351 else
3352 align_jumps = 1 << i;
3353 }
3354 }
3355
3356 if (ix86_align_funcs_string)
3357 {
3358 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3359 prefix, suffix, suffix);
3360 if (align_functions == 0)
3361 {
3362 i = atoi (ix86_align_funcs_string);
3363 if (i < 0 || i > MAX_CODE_ALIGN)
3364 error ("%salign-loops=%d%s is not between 0 and %d",
3365 prefix, i, suffix, MAX_CODE_ALIGN);
3366 else
3367 align_functions = 1 << i;
3368 }
3369 }
3370
3371 /* Default align_* from the processor table. */
3372 if (align_loops == 0)
3373 {
3374 align_loops = processor_target_table[ix86_tune].align_loop;
3375 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3376 }
3377 if (align_jumps == 0)
3378 {
3379 align_jumps = processor_target_table[ix86_tune].align_jump;
3380 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3381 }
3382 if (align_functions == 0)
3383 {
3384 align_functions = processor_target_table[ix86_tune].align_func;
3385 }
3386
3387 /* Validate -mbranch-cost= value, or provide default. */
3388 ix86_branch_cost = ix86_cost->branch_cost;
3389 if (ix86_branch_cost_string)
3390 {
3391 i = atoi (ix86_branch_cost_string);
3392 if (i < 0 || i > 5)
3393 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3394 else
3395 ix86_branch_cost = i;
3396 }
3397 if (ix86_section_threshold_string)
3398 {
3399 i = atoi (ix86_section_threshold_string);
3400 if (i < 0)
3401 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3402 else
3403 ix86_section_threshold = i;
3404 }
3405
3406 if (ix86_tls_dialect_string)
3407 {
3408 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3409 ix86_tls_dialect = TLS_DIALECT_GNU;
3410 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3411 ix86_tls_dialect = TLS_DIALECT_GNU2;
3412 else
3413 error ("bad value (%s) for %stls-dialect=%s %s",
3414 ix86_tls_dialect_string, prefix, suffix, sw);
3415 }
3416
3417 if (ix87_precision_string)
3418 {
3419 i = atoi (ix87_precision_string);
3420 if (i != 32 && i != 64 && i != 80)
3421 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3422 }
3423
3424 if (TARGET_64BIT)
3425 {
3426 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3427
3428 /* Enable by default the SSE and MMX builtins. Do allow the user to
3429 explicitly disable any of these. In particular, disabling SSE and
3430 MMX for kernel code is extremely useful. */
3431 if (!ix86_arch_specified)
3432 ix86_isa_flags
3433 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3434 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3435
3436 if (TARGET_RTD)
3437 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3438 }
3439 else
3440 {
3441 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3442
3443 if (!ix86_arch_specified)
3444 ix86_isa_flags
3445 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3446
3447 /* i386 ABI does not specify red zone. It still makes sense to use it
3448 when programmer takes care to stack from being destroyed. */
3449 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3450 target_flags |= MASK_NO_RED_ZONE;
3451 }
3452
3453 /* Keep nonleaf frame pointers. */
3454 if (flag_omit_frame_pointer)
3455 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3456 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3457 flag_omit_frame_pointer = 1;
3458
3459 /* If we're doing fast math, we don't care about comparison order
3460 wrt NaNs. This lets us use a shorter comparison sequence. */
3461 if (flag_finite_math_only)
3462 target_flags &= ~MASK_IEEE_FP;
3463
3464 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3465 since the insns won't need emulation. */
3466 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3467 target_flags &= ~MASK_NO_FANCY_MATH_387;
3468
3469 /* Likewise, if the target doesn't have a 387, or we've specified
3470 software floating point, don't use 387 inline intrinsics. */
3471 if (!TARGET_80387)
3472 target_flags |= MASK_NO_FANCY_MATH_387;
3473
3474 /* Turn on MMX builtins for -msse. */
3475 if (TARGET_SSE)
3476 {
3477 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3478 x86_prefetch_sse = true;
3479 }
3480
3481 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3482 if (TARGET_SSE4_2 || TARGET_ABM)
3483 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3484
3485 /* Validate -mpreferred-stack-boundary= value or default it to
3486 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3487 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3488 if (ix86_preferred_stack_boundary_string)
3489 {
3490 i = atoi (ix86_preferred_stack_boundary_string);
3491 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3492 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3493 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3494 else
3495 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3496 }
3497
3498 /* Set the default value for -mstackrealign. */
3499 if (ix86_force_align_arg_pointer == -1)
3500 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3501
3502 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3503
3504 /* Validate -mincoming-stack-boundary= value or default it to
3505 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3506 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3507 if (ix86_incoming_stack_boundary_string)
3508 {
3509 i = atoi (ix86_incoming_stack_boundary_string);
3510 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3511 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3512 i, TARGET_64BIT ? 4 : 2);
3513 else
3514 {
3515 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3516 ix86_incoming_stack_boundary
3517 = ix86_user_incoming_stack_boundary;
3518 }
3519 }
3520
3521 /* Accept -msseregparm only if at least SSE support is enabled. */
3522 if (TARGET_SSEREGPARM
3523 && ! TARGET_SSE)
3524 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3525
3526 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3527 if (ix86_fpmath_string != 0)
3528 {
3529 if (! strcmp (ix86_fpmath_string, "387"))
3530 ix86_fpmath = FPMATH_387;
3531 else if (! strcmp (ix86_fpmath_string, "sse"))
3532 {
3533 if (!TARGET_SSE)
3534 {
3535 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3536 ix86_fpmath = FPMATH_387;
3537 }
3538 else
3539 ix86_fpmath = FPMATH_SSE;
3540 }
3541 else if (! strcmp (ix86_fpmath_string, "387,sse")
3542 || ! strcmp (ix86_fpmath_string, "387+sse")
3543 || ! strcmp (ix86_fpmath_string, "sse,387")
3544 || ! strcmp (ix86_fpmath_string, "sse+387")
3545 || ! strcmp (ix86_fpmath_string, "both"))
3546 {
3547 if (!TARGET_SSE)
3548 {
3549 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3550 ix86_fpmath = FPMATH_387;
3551 }
3552 else if (!TARGET_80387)
3553 {
3554 warning (0, "387 instruction set disabled, using SSE arithmetics");
3555 ix86_fpmath = FPMATH_SSE;
3556 }
3557 else
3558 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3559 }
3560 else
3561 error ("bad value (%s) for %sfpmath=%s %s",
3562 ix86_fpmath_string, prefix, suffix, sw);
3563 }
3564
3565 /* If the i387 is disabled, then do not return values in it. */
3566 if (!TARGET_80387)
3567 target_flags &= ~MASK_FLOAT_RETURNS;
3568
3569 /* Use external vectorized library in vectorizing intrinsics. */
3570 if (ix86_veclibabi_string)
3571 {
3572 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3573 ix86_veclib_handler = ix86_veclibabi_svml;
3574 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3575 ix86_veclib_handler = ix86_veclibabi_acml;
3576 else
3577 error ("unknown vectorization library ABI type (%s) for "
3578 "%sveclibabi=%s %s", ix86_veclibabi_string,
3579 prefix, suffix, sw);
3580 }
3581
3582 if ((!USE_IX86_FRAME_POINTER
3583 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3584 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3585 && !optimize_size)
3586 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3587
3588 /* ??? Unwind info is not correct around the CFG unless either a frame
3589 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3590 unwind info generation to be aware of the CFG and propagating states
3591 around edges. */
3592 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3593 || flag_exceptions || flag_non_call_exceptions)
3594 && flag_omit_frame_pointer
3595 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3596 {
3597 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3598 warning (0, "unwind tables currently require either a frame pointer "
3599 "or %saccumulate-outgoing-args%s for correctness",
3600 prefix, suffix);
3601 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3602 }
3603
3604 /* If stack probes are required, the space used for large function
3605 arguments on the stack must also be probed, so enable
3606 -maccumulate-outgoing-args so this happens in the prologue. */
3607 if (TARGET_STACK_PROBE
3608 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3609 {
3610 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3611 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3612 "for correctness", prefix, suffix);
3613 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3614 }
3615
3616 /* For sane SSE instruction set generation we need fcomi instruction.
3617 It is safe to enable all CMOVE instructions. */
3618 if (TARGET_SSE)
3619 TARGET_CMOVE = 1;
3620
3621 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3622 {
3623 char *p;
3624 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3625 p = strchr (internal_label_prefix, 'X');
3626 internal_label_prefix_len = p - internal_label_prefix;
3627 *p = '\0';
3628 }
3629
3630 /* When scheduling description is not available, disable scheduler pass
3631 so it won't slow down the compilation and make x87 code slower. */
3632 if (!TARGET_SCHEDULE)
3633 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3634
3635 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3636 set_param_value ("simultaneous-prefetches",
3637 ix86_cost->simultaneous_prefetches);
3638 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3639 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3640 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3641 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3642 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3643 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3644
3645 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3646 if (flag_prefetch_loop_arrays < 0
3647 && HAVE_prefetch
3648 && optimize >= 3
3649 && software_prefetching_beneficial_p ())
3650 flag_prefetch_loop_arrays = 1;
3651
3652 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3653 can be optimized to ap = __builtin_next_arg (0). */
3654 if (!TARGET_64BIT && !flag_split_stack)
3655 targetm.expand_builtin_va_start = NULL;
3656
3657 if (TARGET_64BIT)
3658 {
3659 ix86_gen_leave = gen_leave_rex64;
3660 ix86_gen_add3 = gen_adddi3;
3661 ix86_gen_sub3 = gen_subdi3;
3662 ix86_gen_sub3_carry = gen_subdi3_carry;
3663 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3664 ix86_gen_monitor = gen_sse3_monitor64;
3665 ix86_gen_andsp = gen_anddi3;
3666 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
3667 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3668 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3669 }
3670 else
3671 {
3672 ix86_gen_leave = gen_leave;
3673 ix86_gen_add3 = gen_addsi3;
3674 ix86_gen_sub3 = gen_subsi3;
3675 ix86_gen_sub3_carry = gen_subsi3_carry;
3676 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3677 ix86_gen_monitor = gen_sse3_monitor;
3678 ix86_gen_andsp = gen_andsi3;
3679 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
3680 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3681 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3682 }
3683
3684 #ifdef USE_IX86_CLD
3685 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3686 if (!TARGET_64BIT)
3687 target_flags |= MASK_CLD & ~target_flags_explicit;
3688 #endif
3689
3690 if (!TARGET_64BIT && flag_pic)
3691 {
3692 if (flag_fentry > 0)
3693 sorry ("-mfentry isn't supported for 32-bit in combination with -fpic");
3694 flag_fentry = 0;
3695 }
3696 if (flag_fentry < 0)
3697 {
3698 #if defined(PROFILE_BEFORE_PROLOGUE)
3699 flag_fentry = 1;
3700 #else
3701 flag_fentry = 0;
3702 #endif
3703 }
3704
3705 /* Save the initial options in case the user does function specific options */
3706 if (main_args_p)
3707 target_option_default_node = target_option_current_node
3708 = build_target_option_node ();
3709 }
3710
3711 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3712
3713 static void
3714 ix86_option_override (void)
3715 {
3716 ix86_option_override_internal (true);
3717 }
3718
3719 /* Update register usage after having seen the compiler flags. */
3720
3721 void
3722 ix86_conditional_register_usage (void)
3723 {
3724 int i;
3725 unsigned int j;
3726
3727 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3728 {
3729 if (fixed_regs[i] > 1)
3730 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3731 if (call_used_regs[i] > 1)
3732 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3733 }
3734
3735 /* The PIC register, if it exists, is fixed. */
3736 j = PIC_OFFSET_TABLE_REGNUM;
3737 if (j != INVALID_REGNUM)
3738 fixed_regs[j] = call_used_regs[j] = 1;
3739
3740 /* The MS_ABI changes the set of call-used registers. */
3741 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3742 {
3743 call_used_regs[SI_REG] = 0;
3744 call_used_regs[DI_REG] = 0;
3745 call_used_regs[XMM6_REG] = 0;
3746 call_used_regs[XMM7_REG] = 0;
3747 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3748 call_used_regs[i] = 0;
3749 }
3750
3751 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3752 other call-clobbered regs for 64-bit. */
3753 if (TARGET_64BIT)
3754 {
3755 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3756
3757 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3758 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3759 && call_used_regs[i])
3760 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3761 }
3762
3763 /* If MMX is disabled, squash the registers. */
3764 if (! TARGET_MMX)
3765 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3766 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3767 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3768
3769 /* If SSE is disabled, squash the registers. */
3770 if (! TARGET_SSE)
3771 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3772 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3773 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3774
3775 /* If the FPU is disabled, squash the registers. */
3776 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3777 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3778 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3779 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3780
3781 /* If 32-bit, squash the 64-bit registers. */
3782 if (! TARGET_64BIT)
3783 {
3784 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3785 reg_names[i] = "";
3786 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3787 reg_names[i] = "";
3788 }
3789 }
3790
3791 \f
3792 /* Save the current options */
3793
3794 static void
3795 ix86_function_specific_save (struct cl_target_option *ptr)
3796 {
3797 ptr->arch = ix86_arch;
3798 ptr->schedule = ix86_schedule;
3799 ptr->tune = ix86_tune;
3800 ptr->fpmath = ix86_fpmath;
3801 ptr->branch_cost = ix86_branch_cost;
3802 ptr->tune_defaulted = ix86_tune_defaulted;
3803 ptr->arch_specified = ix86_arch_specified;
3804 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3805 ptr->ix86_target_flags_explicit = target_flags_explicit;
3806
3807 /* The fields are char but the variables are not; make sure the
3808 values fit in the fields. */
3809 gcc_assert (ptr->arch == ix86_arch);
3810 gcc_assert (ptr->schedule == ix86_schedule);
3811 gcc_assert (ptr->tune == ix86_tune);
3812 gcc_assert (ptr->fpmath == ix86_fpmath);
3813 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3814 }
3815
3816 /* Restore the current options */
3817
3818 static void
3819 ix86_function_specific_restore (struct cl_target_option *ptr)
3820 {
3821 enum processor_type old_tune = ix86_tune;
3822 enum processor_type old_arch = ix86_arch;
3823 unsigned int ix86_arch_mask, ix86_tune_mask;
3824 int i;
3825
3826 ix86_arch = (enum processor_type) ptr->arch;
3827 ix86_schedule = (enum attr_cpu) ptr->schedule;
3828 ix86_tune = (enum processor_type) ptr->tune;
3829 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3830 ix86_branch_cost = ptr->branch_cost;
3831 ix86_tune_defaulted = ptr->tune_defaulted;
3832 ix86_arch_specified = ptr->arch_specified;
3833 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3834 target_flags_explicit = ptr->ix86_target_flags_explicit;
3835
3836 /* Recreate the arch feature tests if the arch changed */
3837 if (old_arch != ix86_arch)
3838 {
3839 ix86_arch_mask = 1u << ix86_arch;
3840 for (i = 0; i < X86_ARCH_LAST; ++i)
3841 ix86_arch_features[i]
3842 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3843 }
3844
3845 /* Recreate the tune optimization tests */
3846 if (old_tune != ix86_tune)
3847 {
3848 ix86_tune_mask = 1u << ix86_tune;
3849 for (i = 0; i < X86_TUNE_LAST; ++i)
3850 ix86_tune_features[i]
3851 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3852 }
3853 }
3854
3855 /* Print the current options */
3856
3857 static void
3858 ix86_function_specific_print (FILE *file, int indent,
3859 struct cl_target_option *ptr)
3860 {
3861 char *target_string
3862 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
3863 NULL, NULL, NULL, false);
3864
3865 fprintf (file, "%*sarch = %d (%s)\n",
3866 indent, "",
3867 ptr->arch,
3868 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3869 ? cpu_names[ptr->arch]
3870 : "<unknown>"));
3871
3872 fprintf (file, "%*stune = %d (%s)\n",
3873 indent, "",
3874 ptr->tune,
3875 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3876 ? cpu_names[ptr->tune]
3877 : "<unknown>"));
3878
3879 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3880 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3881 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3882 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3883
3884 if (target_string)
3885 {
3886 fprintf (file, "%*s%s\n", indent, "", target_string);
3887 free (target_string);
3888 }
3889 }
3890
3891 \f
3892 /* Inner function to process the attribute((target(...))), take an argument and
3893 set the current options from the argument. If we have a list, recursively go
3894 over the list. */
3895
3896 static bool
3897 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3898 {
3899 char *next_optstr;
3900 bool ret = true;
3901
3902 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3903 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3904 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3905 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3906
3907 enum ix86_opt_type
3908 {
3909 ix86_opt_unknown,
3910 ix86_opt_yes,
3911 ix86_opt_no,
3912 ix86_opt_str,
3913 ix86_opt_isa
3914 };
3915
3916 static const struct
3917 {
3918 const char *string;
3919 size_t len;
3920 enum ix86_opt_type type;
3921 int opt;
3922 int mask;
3923 } attrs[] = {
3924 /* isa options */
3925 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3926 IX86_ATTR_ISA ("abm", OPT_mabm),
3927 IX86_ATTR_ISA ("aes", OPT_maes),
3928 IX86_ATTR_ISA ("avx", OPT_mavx),
3929 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3930 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3931 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3932 IX86_ATTR_ISA ("sse", OPT_msse),
3933 IX86_ATTR_ISA ("sse2", OPT_msse2),
3934 IX86_ATTR_ISA ("sse3", OPT_msse3),
3935 IX86_ATTR_ISA ("sse4", OPT_msse4),
3936 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3937 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3938 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3939 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3940 IX86_ATTR_ISA ("fma4", OPT_mfma4),
3941 IX86_ATTR_ISA ("xop", OPT_mxop),
3942 IX86_ATTR_ISA ("lwp", OPT_mlwp),
3943 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
3944 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
3945 IX86_ATTR_ISA ("f16c", OPT_mf16c),
3946
3947 /* string options */
3948 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3949 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3950 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3951
3952 /* flag options */
3953 IX86_ATTR_YES ("cld",
3954 OPT_mcld,
3955 MASK_CLD),
3956
3957 IX86_ATTR_NO ("fancy-math-387",
3958 OPT_mfancy_math_387,
3959 MASK_NO_FANCY_MATH_387),
3960
3961 IX86_ATTR_YES ("ieee-fp",
3962 OPT_mieee_fp,
3963 MASK_IEEE_FP),
3964
3965 IX86_ATTR_YES ("inline-all-stringops",
3966 OPT_minline_all_stringops,
3967 MASK_INLINE_ALL_STRINGOPS),
3968
3969 IX86_ATTR_YES ("inline-stringops-dynamically",
3970 OPT_minline_stringops_dynamically,
3971 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3972
3973 IX86_ATTR_NO ("align-stringops",
3974 OPT_mno_align_stringops,
3975 MASK_NO_ALIGN_STRINGOPS),
3976
3977 IX86_ATTR_YES ("recip",
3978 OPT_mrecip,
3979 MASK_RECIP),
3980
3981 };
3982
3983 /* If this is a list, recurse to get the options. */
3984 if (TREE_CODE (args) == TREE_LIST)
3985 {
3986 bool ret = true;
3987
3988 for (; args; args = TREE_CHAIN (args))
3989 if (TREE_VALUE (args)
3990 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3991 ret = false;
3992
3993 return ret;
3994 }
3995
3996 else if (TREE_CODE (args) != STRING_CST)
3997 gcc_unreachable ();
3998
3999 /* Handle multiple arguments separated by commas. */
4000 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4001
4002 while (next_optstr && *next_optstr != '\0')
4003 {
4004 char *p = next_optstr;
4005 char *orig_p = p;
4006 char *comma = strchr (next_optstr, ',');
4007 const char *opt_string;
4008 size_t len, opt_len;
4009 int opt;
4010 bool opt_set_p;
4011 char ch;
4012 unsigned i;
4013 enum ix86_opt_type type = ix86_opt_unknown;
4014 int mask = 0;
4015
4016 if (comma)
4017 {
4018 *comma = '\0';
4019 len = comma - next_optstr;
4020 next_optstr = comma + 1;
4021 }
4022 else
4023 {
4024 len = strlen (p);
4025 next_optstr = NULL;
4026 }
4027
4028 /* Recognize no-xxx. */
4029 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4030 {
4031 opt_set_p = false;
4032 p += 3;
4033 len -= 3;
4034 }
4035 else
4036 opt_set_p = true;
4037
4038 /* Find the option. */
4039 ch = *p;
4040 opt = N_OPTS;
4041 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4042 {
4043 type = attrs[i].type;
4044 opt_len = attrs[i].len;
4045 if (ch == attrs[i].string[0]
4046 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
4047 && memcmp (p, attrs[i].string, opt_len) == 0)
4048 {
4049 opt = attrs[i].opt;
4050 mask = attrs[i].mask;
4051 opt_string = attrs[i].string;
4052 break;
4053 }
4054 }
4055
4056 /* Process the option. */
4057 if (opt == N_OPTS)
4058 {
4059 error ("attribute(target(\"%s\")) is unknown", orig_p);
4060 ret = false;
4061 }
4062
4063 else if (type == ix86_opt_isa)
4064 ix86_handle_option (opt, p, opt_set_p);
4065
4066 else if (type == ix86_opt_yes || type == ix86_opt_no)
4067 {
4068 if (type == ix86_opt_no)
4069 opt_set_p = !opt_set_p;
4070
4071 if (opt_set_p)
4072 target_flags |= mask;
4073 else
4074 target_flags &= ~mask;
4075 }
4076
4077 else if (type == ix86_opt_str)
4078 {
4079 if (p_strings[opt])
4080 {
4081 error ("option(\"%s\") was already specified", opt_string);
4082 ret = false;
4083 }
4084 else
4085 p_strings[opt] = xstrdup (p + opt_len);
4086 }
4087
4088 else
4089 gcc_unreachable ();
4090 }
4091
4092 return ret;
4093 }
4094
4095 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4096
4097 tree
4098 ix86_valid_target_attribute_tree (tree args)
4099 {
4100 const char *orig_arch_string = ix86_arch_string;
4101 const char *orig_tune_string = ix86_tune_string;
4102 const char *orig_fpmath_string = ix86_fpmath_string;
4103 int orig_tune_defaulted = ix86_tune_defaulted;
4104 int orig_arch_specified = ix86_arch_specified;
4105 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
4106 tree t = NULL_TREE;
4107 int i;
4108 struct cl_target_option *def
4109 = TREE_TARGET_OPTION (target_option_default_node);
4110
4111 /* Process each of the options on the chain. */
4112 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
4113 return NULL_TREE;
4114
4115 /* If the changed options are different from the default, rerun
4116 ix86_option_override_internal, and then save the options away.
4117 The string options are are attribute options, and will be undone
4118 when we copy the save structure. */
4119 if (ix86_isa_flags != def->x_ix86_isa_flags
4120 || target_flags != def->x_target_flags
4121 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4122 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4123 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4124 {
4125 /* If we are using the default tune= or arch=, undo the string assigned,
4126 and use the default. */
4127 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4128 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4129 else if (!orig_arch_specified)
4130 ix86_arch_string = NULL;
4131
4132 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4133 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4134 else if (orig_tune_defaulted)
4135 ix86_tune_string = NULL;
4136
4137 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4138 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4139 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
4140 else if (!TARGET_64BIT && TARGET_SSE)
4141 ix86_fpmath_string = "sse,387";
4142
4143 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4144 ix86_option_override_internal (false);
4145
4146 /* Add any builtin functions with the new isa if any. */
4147 ix86_add_new_builtins (ix86_isa_flags);
4148
4149 /* Save the current options unless we are validating options for
4150 #pragma. */
4151 t = build_target_option_node ();
4152
4153 ix86_arch_string = orig_arch_string;
4154 ix86_tune_string = orig_tune_string;
4155 ix86_fpmath_string = orig_fpmath_string;
4156
4157 /* Free up memory allocated to hold the strings */
4158 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4159 if (option_strings[i])
4160 free (option_strings[i]);
4161 }
4162
4163 return t;
4164 }
4165
4166 /* Hook to validate attribute((target("string"))). */
4167
4168 static bool
4169 ix86_valid_target_attribute_p (tree fndecl,
4170 tree ARG_UNUSED (name),
4171 tree args,
4172 int ARG_UNUSED (flags))
4173 {
4174 struct cl_target_option cur_target;
4175 bool ret = true;
4176 tree old_optimize = build_optimization_node ();
4177 tree new_target, new_optimize;
4178 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4179
4180 /* If the function changed the optimization levels as well as setting target
4181 options, start with the optimizations specified. */
4182 if (func_optimize && func_optimize != old_optimize)
4183 cl_optimization_restore (&global_options,
4184 TREE_OPTIMIZATION (func_optimize));
4185
4186 /* The target attributes may also change some optimization flags, so update
4187 the optimization options if necessary. */
4188 cl_target_option_save (&cur_target, &global_options);
4189 new_target = ix86_valid_target_attribute_tree (args);
4190 new_optimize = build_optimization_node ();
4191
4192 if (!new_target)
4193 ret = false;
4194
4195 else if (fndecl)
4196 {
4197 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4198
4199 if (old_optimize != new_optimize)
4200 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4201 }
4202
4203 cl_target_option_restore (&global_options, &cur_target);
4204
4205 if (old_optimize != new_optimize)
4206 cl_optimization_restore (&global_options,
4207 TREE_OPTIMIZATION (old_optimize));
4208
4209 return ret;
4210 }
4211
4212 \f
4213 /* Hook to determine if one function can safely inline another. */
4214
4215 static bool
4216 ix86_can_inline_p (tree caller, tree callee)
4217 {
4218 bool ret = false;
4219 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4220 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4221
4222 /* If callee has no option attributes, then it is ok to inline. */
4223 if (!callee_tree)
4224 ret = true;
4225
4226 /* If caller has no option attributes, but callee does then it is not ok to
4227 inline. */
4228 else if (!caller_tree)
4229 ret = false;
4230
4231 else
4232 {
4233 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4234 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4235
4236 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4237 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4238 function. */
4239 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4240 != callee_opts->x_ix86_isa_flags)
4241 ret = false;
4242
4243 /* See if we have the same non-isa options. */
4244 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4245 ret = false;
4246
4247 /* See if arch, tune, etc. are the same. */
4248 else if (caller_opts->arch != callee_opts->arch)
4249 ret = false;
4250
4251 else if (caller_opts->tune != callee_opts->tune)
4252 ret = false;
4253
4254 else if (caller_opts->fpmath != callee_opts->fpmath)
4255 ret = false;
4256
4257 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4258 ret = false;
4259
4260 else
4261 ret = true;
4262 }
4263
4264 return ret;
4265 }
4266
4267 \f
4268 /* Remember the last target of ix86_set_current_function. */
4269 static GTY(()) tree ix86_previous_fndecl;
4270
4271 /* Establish appropriate back-end context for processing the function
4272 FNDECL. The argument might be NULL to indicate processing at top
4273 level, outside of any function scope. */
4274 static void
4275 ix86_set_current_function (tree fndecl)
4276 {
4277 /* Only change the context if the function changes. This hook is called
4278 several times in the course of compiling a function, and we don't want to
4279 slow things down too much or call target_reinit when it isn't safe. */
4280 if (fndecl && fndecl != ix86_previous_fndecl)
4281 {
4282 tree old_tree = (ix86_previous_fndecl
4283 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4284 : NULL_TREE);
4285
4286 tree new_tree = (fndecl
4287 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4288 : NULL_TREE);
4289
4290 ix86_previous_fndecl = fndecl;
4291 if (old_tree == new_tree)
4292 ;
4293
4294 else if (new_tree)
4295 {
4296 cl_target_option_restore (&global_options,
4297 TREE_TARGET_OPTION (new_tree));
4298 target_reinit ();
4299 }
4300
4301 else if (old_tree)
4302 {
4303 struct cl_target_option *def
4304 = TREE_TARGET_OPTION (target_option_current_node);
4305
4306 cl_target_option_restore (&global_options, def);
4307 target_reinit ();
4308 }
4309 }
4310 }
4311
4312 \f
4313 /* Return true if this goes in large data/bss. */
4314
4315 static bool
4316 ix86_in_large_data_p (tree exp)
4317 {
4318 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4319 return false;
4320
4321 /* Functions are never large data. */
4322 if (TREE_CODE (exp) == FUNCTION_DECL)
4323 return false;
4324
4325 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4326 {
4327 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4328 if (strcmp (section, ".ldata") == 0
4329 || strcmp (section, ".lbss") == 0)
4330 return true;
4331 return false;
4332 }
4333 else
4334 {
4335 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4336
4337 /* If this is an incomplete type with size 0, then we can't put it
4338 in data because it might be too big when completed. */
4339 if (!size || size > ix86_section_threshold)
4340 return true;
4341 }
4342
4343 return false;
4344 }
4345
4346 /* Switch to the appropriate section for output of DECL.
4347 DECL is either a `VAR_DECL' node or a constant of some sort.
4348 RELOC indicates whether forming the initial value of DECL requires
4349 link-time relocations. */
4350
4351 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4352 ATTRIBUTE_UNUSED;
4353
4354 static section *
4355 x86_64_elf_select_section (tree decl, int reloc,
4356 unsigned HOST_WIDE_INT align)
4357 {
4358 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4359 && ix86_in_large_data_p (decl))
4360 {
4361 const char *sname = NULL;
4362 unsigned int flags = SECTION_WRITE;
4363 switch (categorize_decl_for_section (decl, reloc))
4364 {
4365 case SECCAT_DATA:
4366 sname = ".ldata";
4367 break;
4368 case SECCAT_DATA_REL:
4369 sname = ".ldata.rel";
4370 break;
4371 case SECCAT_DATA_REL_LOCAL:
4372 sname = ".ldata.rel.local";
4373 break;
4374 case SECCAT_DATA_REL_RO:
4375 sname = ".ldata.rel.ro";
4376 break;
4377 case SECCAT_DATA_REL_RO_LOCAL:
4378 sname = ".ldata.rel.ro.local";
4379 break;
4380 case SECCAT_BSS:
4381 sname = ".lbss";
4382 flags |= SECTION_BSS;
4383 break;
4384 case SECCAT_RODATA:
4385 case SECCAT_RODATA_MERGE_STR:
4386 case SECCAT_RODATA_MERGE_STR_INIT:
4387 case SECCAT_RODATA_MERGE_CONST:
4388 sname = ".lrodata";
4389 flags = 0;
4390 break;
4391 case SECCAT_SRODATA:
4392 case SECCAT_SDATA:
4393 case SECCAT_SBSS:
4394 gcc_unreachable ();
4395 case SECCAT_TEXT:
4396 case SECCAT_TDATA:
4397 case SECCAT_TBSS:
4398 /* We don't split these for medium model. Place them into
4399 default sections and hope for best. */
4400 break;
4401 }
4402 if (sname)
4403 {
4404 /* We might get called with string constants, but get_named_section
4405 doesn't like them as they are not DECLs. Also, we need to set
4406 flags in that case. */
4407 if (!DECL_P (decl))
4408 return get_section (sname, flags, NULL);
4409 return get_named_section (decl, sname, reloc);
4410 }
4411 }
4412 return default_elf_select_section (decl, reloc, align);
4413 }
4414
4415 /* Build up a unique section name, expressed as a
4416 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4417 RELOC indicates whether the initial value of EXP requires
4418 link-time relocations. */
4419
4420 static void ATTRIBUTE_UNUSED
4421 x86_64_elf_unique_section (tree decl, int reloc)
4422 {
4423 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4424 && ix86_in_large_data_p (decl))
4425 {
4426 const char *prefix = NULL;
4427 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4428 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4429
4430 switch (categorize_decl_for_section (decl, reloc))
4431 {
4432 case SECCAT_DATA:
4433 case SECCAT_DATA_REL:
4434 case SECCAT_DATA_REL_LOCAL:
4435 case SECCAT_DATA_REL_RO:
4436 case SECCAT_DATA_REL_RO_LOCAL:
4437 prefix = one_only ? ".ld" : ".ldata";
4438 break;
4439 case SECCAT_BSS:
4440 prefix = one_only ? ".lb" : ".lbss";
4441 break;
4442 case SECCAT_RODATA:
4443 case SECCAT_RODATA_MERGE_STR:
4444 case SECCAT_RODATA_MERGE_STR_INIT:
4445 case SECCAT_RODATA_MERGE_CONST:
4446 prefix = one_only ? ".lr" : ".lrodata";
4447 break;
4448 case SECCAT_SRODATA:
4449 case SECCAT_SDATA:
4450 case SECCAT_SBSS:
4451 gcc_unreachable ();
4452 case SECCAT_TEXT:
4453 case SECCAT_TDATA:
4454 case SECCAT_TBSS:
4455 /* We don't split these for medium model. Place them into
4456 default sections and hope for best. */
4457 break;
4458 }
4459 if (prefix)
4460 {
4461 const char *name, *linkonce;
4462 char *string;
4463
4464 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4465 name = targetm.strip_name_encoding (name);
4466
4467 /* If we're using one_only, then there needs to be a .gnu.linkonce
4468 prefix to the section name. */
4469 linkonce = one_only ? ".gnu.linkonce" : "";
4470
4471 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4472
4473 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4474 return;
4475 }
4476 }
4477 default_unique_section (decl, reloc);
4478 }
4479
4480 #ifdef COMMON_ASM_OP
4481 /* This says how to output assembler code to declare an
4482 uninitialized external linkage data object.
4483
4484 For medium model x86-64 we need to use .largecomm opcode for
4485 large objects. */
4486 void
4487 x86_elf_aligned_common (FILE *file,
4488 const char *name, unsigned HOST_WIDE_INT size,
4489 int align)
4490 {
4491 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4492 && size > (unsigned int)ix86_section_threshold)
4493 fputs (".largecomm\t", file);
4494 else
4495 fputs (COMMON_ASM_OP, file);
4496 assemble_name (file, name);
4497 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4498 size, align / BITS_PER_UNIT);
4499 }
4500 #endif
4501
4502 /* Utility function for targets to use in implementing
4503 ASM_OUTPUT_ALIGNED_BSS. */
4504
4505 void
4506 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4507 const char *name, unsigned HOST_WIDE_INT size,
4508 int align)
4509 {
4510 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4511 && size > (unsigned int)ix86_section_threshold)
4512 switch_to_section (get_named_section (decl, ".lbss", 0));
4513 else
4514 switch_to_section (bss_section);
4515 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4516 #ifdef ASM_DECLARE_OBJECT_NAME
4517 last_assemble_variable_decl = decl;
4518 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4519 #else
4520 /* Standard thing is just output label for the object. */
4521 ASM_OUTPUT_LABEL (file, name);
4522 #endif /* ASM_DECLARE_OBJECT_NAME */
4523 ASM_OUTPUT_SKIP (file, size ? size : 1);
4524 }
4525 \f
4526 static void
4527 ix86_option_optimization (int level, int size ATTRIBUTE_UNUSED)
4528 {
4529 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4530 make the problem with not enough registers even worse. */
4531 #ifdef INSN_SCHEDULING
4532 if (level > 1)
4533 flag_schedule_insns = 0;
4534 #endif
4535
4536 if (TARGET_MACHO)
4537 /* The Darwin libraries never set errno, so we might as well
4538 avoid calling them when that's the only reason we would. */
4539 flag_errno_math = 0;
4540
4541 /* The default values of these switches depend on the TARGET_64BIT
4542 that is not known at this moment. Mark these values with 2 and
4543 let user the to override these. In case there is no command line
4544 option specifying them, we will set the defaults in
4545 ix86_option_override_internal. */
4546 if (optimize >= 1)
4547 flag_omit_frame_pointer = 2;
4548
4549 /* For -O2 and beyond, turn on -fzee for x86_64 target. */
4550 if (level > 1)
4551 flag_zee = 2;
4552
4553 flag_pcc_struct_return = 2;
4554 flag_asynchronous_unwind_tables = 2;
4555 flag_vect_cost_model = 1;
4556 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4557 SUBTARGET_OPTIMIZATION_OPTIONS;
4558 #endif
4559 }
4560
4561 /* Decide whether we must probe the stack before any space allocation
4562 on this target. It's essentially TARGET_STACK_PROBE except when
4563 -fstack-check causes the stack to be already probed differently. */
4564
4565 bool
4566 ix86_target_stack_probe (void)
4567 {
4568 /* Do not probe the stack twice if static stack checking is enabled. */
4569 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4570 return false;
4571
4572 return TARGET_STACK_PROBE;
4573 }
4574 \f
4575 /* Decide whether we can make a sibling call to a function. DECL is the
4576 declaration of the function being targeted by the call and EXP is the
4577 CALL_EXPR representing the call. */
4578
4579 static bool
4580 ix86_function_ok_for_sibcall (tree decl, tree exp)
4581 {
4582 tree type, decl_or_type;
4583 rtx a, b;
4584
4585 /* If we are generating position-independent code, we cannot sibcall
4586 optimize any indirect call, or a direct call to a global function,
4587 as the PLT requires %ebx be live. */
4588 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4589 return false;
4590
4591 /* If we need to align the outgoing stack, then sibcalling would
4592 unalign the stack, which may break the called function. */
4593 if (ix86_minimum_incoming_stack_boundary (true)
4594 < PREFERRED_STACK_BOUNDARY)
4595 return false;
4596
4597 if (decl)
4598 {
4599 decl_or_type = decl;
4600 type = TREE_TYPE (decl);
4601 }
4602 else
4603 {
4604 /* We're looking at the CALL_EXPR, we need the type of the function. */
4605 type = CALL_EXPR_FN (exp); /* pointer expression */
4606 type = TREE_TYPE (type); /* pointer type */
4607 type = TREE_TYPE (type); /* function type */
4608 decl_or_type = type;
4609 }
4610
4611 /* Check that the return value locations are the same. Like
4612 if we are returning floats on the 80387 register stack, we cannot
4613 make a sibcall from a function that doesn't return a float to a
4614 function that does or, conversely, from a function that does return
4615 a float to a function that doesn't; the necessary stack adjustment
4616 would not be executed. This is also the place we notice
4617 differences in the return value ABI. Note that it is ok for one
4618 of the functions to have void return type as long as the return
4619 value of the other is passed in a register. */
4620 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4621 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4622 cfun->decl, false);
4623 if (STACK_REG_P (a) || STACK_REG_P (b))
4624 {
4625 if (!rtx_equal_p (a, b))
4626 return false;
4627 }
4628 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4629 ;
4630 else if (!rtx_equal_p (a, b))
4631 return false;
4632
4633 if (TARGET_64BIT)
4634 {
4635 /* The SYSV ABI has more call-clobbered registers;
4636 disallow sibcalls from MS to SYSV. */
4637 if (cfun->machine->call_abi == MS_ABI
4638 && ix86_function_type_abi (type) == SYSV_ABI)
4639 return false;
4640 }
4641 else
4642 {
4643 /* If this call is indirect, we'll need to be able to use a
4644 call-clobbered register for the address of the target function.
4645 Make sure that all such registers are not used for passing
4646 parameters. Note that DLLIMPORT functions are indirect. */
4647 if (!decl
4648 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4649 {
4650 if (ix86_function_regparm (type, NULL) >= 3)
4651 {
4652 /* ??? Need to count the actual number of registers to be used,
4653 not the possible number of registers. Fix later. */
4654 return false;
4655 }
4656 }
4657 }
4658
4659 /* Otherwise okay. That also includes certain types of indirect calls. */
4660 return true;
4661 }
4662
4663 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4664 and "sseregparm" calling convention attributes;
4665 arguments as in struct attribute_spec.handler. */
4666
4667 static tree
4668 ix86_handle_cconv_attribute (tree *node, tree name,
4669 tree args,
4670 int flags ATTRIBUTE_UNUSED,
4671 bool *no_add_attrs)
4672 {
4673 if (TREE_CODE (*node) != FUNCTION_TYPE
4674 && TREE_CODE (*node) != METHOD_TYPE
4675 && TREE_CODE (*node) != FIELD_DECL
4676 && TREE_CODE (*node) != TYPE_DECL)
4677 {
4678 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4679 name);
4680 *no_add_attrs = true;
4681 return NULL_TREE;
4682 }
4683
4684 /* Can combine regparm with all attributes but fastcall. */
4685 if (is_attribute_p ("regparm", name))
4686 {
4687 tree cst;
4688
4689 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4690 {
4691 error ("fastcall and regparm attributes are not compatible");
4692 }
4693
4694 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4695 {
4696 error ("regparam and thiscall attributes are not compatible");
4697 }
4698
4699 cst = TREE_VALUE (args);
4700 if (TREE_CODE (cst) != INTEGER_CST)
4701 {
4702 warning (OPT_Wattributes,
4703 "%qE attribute requires an integer constant argument",
4704 name);
4705 *no_add_attrs = true;
4706 }
4707 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4708 {
4709 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4710 name, REGPARM_MAX);
4711 *no_add_attrs = true;
4712 }
4713
4714 return NULL_TREE;
4715 }
4716
4717 if (TARGET_64BIT)
4718 {
4719 /* Do not warn when emulating the MS ABI. */
4720 if ((TREE_CODE (*node) != FUNCTION_TYPE
4721 && TREE_CODE (*node) != METHOD_TYPE)
4722 || ix86_function_type_abi (*node) != MS_ABI)
4723 warning (OPT_Wattributes, "%qE attribute ignored",
4724 name);
4725 *no_add_attrs = true;
4726 return NULL_TREE;
4727 }
4728
4729 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4730 if (is_attribute_p ("fastcall", name))
4731 {
4732 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4733 {
4734 error ("fastcall and cdecl attributes are not compatible");
4735 }
4736 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4737 {
4738 error ("fastcall and stdcall attributes are not compatible");
4739 }
4740 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4741 {
4742 error ("fastcall and regparm attributes are not compatible");
4743 }
4744 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4745 {
4746 error ("fastcall and thiscall attributes are not compatible");
4747 }
4748 }
4749
4750 /* Can combine stdcall with fastcall (redundant), regparm and
4751 sseregparm. */
4752 else if (is_attribute_p ("stdcall", name))
4753 {
4754 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4755 {
4756 error ("stdcall and cdecl attributes are not compatible");
4757 }
4758 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4759 {
4760 error ("stdcall and fastcall attributes are not compatible");
4761 }
4762 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4763 {
4764 error ("stdcall and thiscall attributes are not compatible");
4765 }
4766 }
4767
4768 /* Can combine cdecl with regparm and sseregparm. */
4769 else if (is_attribute_p ("cdecl", name))
4770 {
4771 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4772 {
4773 error ("stdcall and cdecl attributes are not compatible");
4774 }
4775 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4776 {
4777 error ("fastcall and cdecl attributes are not compatible");
4778 }
4779 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4780 {
4781 error ("cdecl and thiscall attributes are not compatible");
4782 }
4783 }
4784 else if (is_attribute_p ("thiscall", name))
4785 {
4786 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
4787 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
4788 name);
4789 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4790 {
4791 error ("stdcall and thiscall attributes are not compatible");
4792 }
4793 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4794 {
4795 error ("fastcall and thiscall attributes are not compatible");
4796 }
4797 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4798 {
4799 error ("cdecl and thiscall attributes are not compatible");
4800 }
4801 }
4802
4803 /* Can combine sseregparm with all attributes. */
4804
4805 return NULL_TREE;
4806 }
4807
4808 /* Return 0 if the attributes for two types are incompatible, 1 if they
4809 are compatible, and 2 if they are nearly compatible (which causes a
4810 warning to be generated). */
4811
4812 static int
4813 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4814 {
4815 /* Check for mismatch of non-default calling convention. */
4816 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4817
4818 if (TREE_CODE (type1) != FUNCTION_TYPE
4819 && TREE_CODE (type1) != METHOD_TYPE)
4820 return 1;
4821
4822 /* Check for mismatched fastcall/regparm types. */
4823 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4824 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4825 || (ix86_function_regparm (type1, NULL)
4826 != ix86_function_regparm (type2, NULL)))
4827 return 0;
4828
4829 /* Check for mismatched sseregparm types. */
4830 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4831 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4832 return 0;
4833
4834 /* Check for mismatched thiscall types. */
4835 if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1))
4836 != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2)))
4837 return 0;
4838
4839 /* Check for mismatched return types (cdecl vs stdcall). */
4840 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4841 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4842 return 0;
4843
4844 return 1;
4845 }
4846 \f
4847 /* Return the regparm value for a function with the indicated TYPE and DECL.
4848 DECL may be NULL when calling function indirectly
4849 or considering a libcall. */
4850
4851 static int
4852 ix86_function_regparm (const_tree type, const_tree decl)
4853 {
4854 tree attr;
4855 int regparm;
4856
4857 if (TARGET_64BIT)
4858 return (ix86_function_type_abi (type) == SYSV_ABI
4859 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4860
4861 regparm = ix86_regparm;
4862 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4863 if (attr)
4864 {
4865 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4866 return regparm;
4867 }
4868
4869 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4870 return 2;
4871
4872 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
4873 return 1;
4874
4875 /* Use register calling convention for local functions when possible. */
4876 if (decl
4877 && TREE_CODE (decl) == FUNCTION_DECL
4878 && optimize
4879 && !(profile_flag && !flag_fentry))
4880 {
4881 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4882 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
4883 if (i && i->local)
4884 {
4885 int local_regparm, globals = 0, regno;
4886
4887 /* Make sure no regparm register is taken by a
4888 fixed register variable. */
4889 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4890 if (fixed_regs[local_regparm])
4891 break;
4892
4893 /* We don't want to use regparm(3) for nested functions as
4894 these use a static chain pointer in the third argument. */
4895 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
4896 local_regparm = 2;
4897
4898 /* In 32-bit mode save a register for the split stack. */
4899 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
4900 local_regparm = 2;
4901
4902 /* Each fixed register usage increases register pressure,
4903 so less registers should be used for argument passing.
4904 This functionality can be overriden by an explicit
4905 regparm value. */
4906 for (regno = 0; regno <= DI_REG; regno++)
4907 if (fixed_regs[regno])
4908 globals++;
4909
4910 local_regparm
4911 = globals < local_regparm ? local_regparm - globals : 0;
4912
4913 if (local_regparm > regparm)
4914 regparm = local_regparm;
4915 }
4916 }
4917
4918 return regparm;
4919 }
4920
4921 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4922 DFmode (2) arguments in SSE registers for a function with the
4923 indicated TYPE and DECL. DECL may be NULL when calling function
4924 indirectly or considering a libcall. Otherwise return 0. */
4925
4926 static int
4927 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4928 {
4929 gcc_assert (!TARGET_64BIT);
4930
4931 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4932 by the sseregparm attribute. */
4933 if (TARGET_SSEREGPARM
4934 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4935 {
4936 if (!TARGET_SSE)
4937 {
4938 if (warn)
4939 {
4940 if (decl)
4941 error ("Calling %qD with attribute sseregparm without "
4942 "SSE/SSE2 enabled", decl);
4943 else
4944 error ("Calling %qT with attribute sseregparm without "
4945 "SSE/SSE2 enabled", type);
4946 }
4947 return 0;
4948 }
4949
4950 return 2;
4951 }
4952
4953 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4954 (and DFmode for SSE2) arguments in SSE registers. */
4955 if (decl && TARGET_SSE_MATH && optimize
4956 && !(profile_flag && !flag_fentry))
4957 {
4958 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4959 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4960 if (i && i->local)
4961 return TARGET_SSE2 ? 2 : 1;
4962 }
4963
4964 return 0;
4965 }
4966
4967 /* Return true if EAX is live at the start of the function. Used by
4968 ix86_expand_prologue to determine if we need special help before
4969 calling allocate_stack_worker. */
4970
4971 static bool
4972 ix86_eax_live_at_start_p (void)
4973 {
4974 /* Cheat. Don't bother working forward from ix86_function_regparm
4975 to the function type to whether an actual argument is located in
4976 eax. Instead just look at cfg info, which is still close enough
4977 to correct at this point. This gives false positives for broken
4978 functions that might use uninitialized data that happens to be
4979 allocated in eax, but who cares? */
4980 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4981 }
4982
4983 /* Value is the number of bytes of arguments automatically
4984 popped when returning from a subroutine call.
4985 FUNDECL is the declaration node of the function (as a tree),
4986 FUNTYPE is the data type of the function (as a tree),
4987 or for a library call it is an identifier node for the subroutine name.
4988 SIZE is the number of bytes of arguments passed on the stack.
4989
4990 On the 80386, the RTD insn may be used to pop them if the number
4991 of args is fixed, but if the number is variable then the caller
4992 must pop them all. RTD can't be used for library calls now
4993 because the library is compiled with the Unix compiler.
4994 Use of RTD is a selectable option, since it is incompatible with
4995 standard Unix calling sequences. If the option is not selected,
4996 the caller must always pop the args.
4997
4998 The attribute stdcall is equivalent to RTD on a per module basis. */
4999
5000 static int
5001 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5002 {
5003 int rtd;
5004
5005 /* None of the 64-bit ABIs pop arguments. */
5006 if (TARGET_64BIT)
5007 return 0;
5008
5009 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
5010
5011 /* Cdecl functions override -mrtd, and never pop the stack. */
5012 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
5013 {
5014 /* Stdcall and fastcall functions will pop the stack if not
5015 variable args. */
5016 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
5017 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))
5018 || lookup_attribute ("thiscall", TYPE_ATTRIBUTES (funtype)))
5019 rtd = 1;
5020
5021 if (rtd && ! stdarg_p (funtype))
5022 return size;
5023 }
5024
5025 /* Lose any fake structure return argument if it is passed on the stack. */
5026 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5027 && !KEEP_AGGREGATE_RETURN_POINTER)
5028 {
5029 int nregs = ix86_function_regparm (funtype, fundecl);
5030 if (nregs == 0)
5031 return GET_MODE_SIZE (Pmode);
5032 }
5033
5034 return 0;
5035 }
5036 \f
5037 /* Argument support functions. */
5038
5039 /* Return true when register may be used to pass function parameters. */
5040 bool
5041 ix86_function_arg_regno_p (int regno)
5042 {
5043 int i;
5044 const int *parm_regs;
5045
5046 if (!TARGET_64BIT)
5047 {
5048 if (TARGET_MACHO)
5049 return (regno < REGPARM_MAX
5050 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5051 else
5052 return (regno < REGPARM_MAX
5053 || (TARGET_MMX && MMX_REGNO_P (regno)
5054 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5055 || (TARGET_SSE && SSE_REGNO_P (regno)
5056 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5057 }
5058
5059 if (TARGET_MACHO)
5060 {
5061 if (SSE_REGNO_P (regno) && TARGET_SSE)
5062 return true;
5063 }
5064 else
5065 {
5066 if (TARGET_SSE && SSE_REGNO_P (regno)
5067 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5068 return true;
5069 }
5070
5071 /* TODO: The function should depend on current function ABI but
5072 builtins.c would need updating then. Therefore we use the
5073 default ABI. */
5074
5075 /* RAX is used as hidden argument to va_arg functions. */
5076 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5077 return true;
5078
5079 if (ix86_abi == MS_ABI)
5080 parm_regs = x86_64_ms_abi_int_parameter_registers;
5081 else
5082 parm_regs = x86_64_int_parameter_registers;
5083 for (i = 0; i < (ix86_abi == MS_ABI
5084 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5085 if (regno == parm_regs[i])
5086 return true;
5087 return false;
5088 }
5089
5090 /* Return if we do not know how to pass TYPE solely in registers. */
5091
5092 static bool
5093 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5094 {
5095 if (must_pass_in_stack_var_size_or_pad (mode, type))
5096 return true;
5097
5098 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5099 The layout_type routine is crafty and tries to trick us into passing
5100 currently unsupported vector types on the stack by using TImode. */
5101 return (!TARGET_64BIT && mode == TImode
5102 && type && TREE_CODE (type) != VECTOR_TYPE);
5103 }
5104
5105 /* It returns the size, in bytes, of the area reserved for arguments passed
5106 in registers for the function represented by fndecl dependent to the used
5107 abi format. */
5108 int
5109 ix86_reg_parm_stack_space (const_tree fndecl)
5110 {
5111 enum calling_abi call_abi = SYSV_ABI;
5112 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5113 call_abi = ix86_function_abi (fndecl);
5114 else
5115 call_abi = ix86_function_type_abi (fndecl);
5116 if (call_abi == MS_ABI)
5117 return 32;
5118 return 0;
5119 }
5120
5121 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5122 call abi used. */
5123 enum calling_abi
5124 ix86_function_type_abi (const_tree fntype)
5125 {
5126 if (TARGET_64BIT && fntype != NULL)
5127 {
5128 enum calling_abi abi = ix86_abi;
5129 if (abi == SYSV_ABI)
5130 {
5131 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5132 abi = MS_ABI;
5133 }
5134 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5135 abi = SYSV_ABI;
5136 return abi;
5137 }
5138 return ix86_abi;
5139 }
5140
5141 static bool
5142 ix86_function_ms_hook_prologue (const_tree fn)
5143 {
5144 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5145 {
5146 if (decl_function_context (fn) != NULL_TREE)
5147 error_at (DECL_SOURCE_LOCATION (fn),
5148 "ms_hook_prologue is not compatible with nested function");
5149 else
5150 return true;
5151 }
5152 return false;
5153 }
5154
5155 static enum calling_abi
5156 ix86_function_abi (const_tree fndecl)
5157 {
5158 if (! fndecl)
5159 return ix86_abi;
5160 return ix86_function_type_abi (TREE_TYPE (fndecl));
5161 }
5162
5163 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5164 call abi used. */
5165 enum calling_abi
5166 ix86_cfun_abi (void)
5167 {
5168 if (! cfun || ! TARGET_64BIT)
5169 return ix86_abi;
5170 return cfun->machine->call_abi;
5171 }
5172
5173 /* Write the extra assembler code needed to declare a function properly. */
5174
5175 void
5176 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5177 tree decl)
5178 {
5179 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5180
5181 if (is_ms_hook)
5182 {
5183 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5184 unsigned int filler_cc = 0xcccccccc;
5185
5186 for (i = 0; i < filler_count; i += 4)
5187 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5188 }
5189
5190 ASM_OUTPUT_LABEL (asm_out_file, fname);
5191
5192 /* Output magic byte marker, if hot-patch attribute is set. */
5193 if (is_ms_hook)
5194 {
5195 if (TARGET_64BIT)
5196 {
5197 /* leaq [%rsp + 0], %rsp */
5198 asm_fprintf (asm_out_file, ASM_BYTE
5199 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5200 }
5201 else
5202 {
5203 /* movl.s %edi, %edi
5204 push %ebp
5205 movl.s %esp, %ebp */
5206 asm_fprintf (asm_out_file, ASM_BYTE
5207 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5208 }
5209 }
5210 }
5211
5212 /* regclass.c */
5213 extern void init_regs (void);
5214
5215 /* Implementation of call abi switching target hook. Specific to FNDECL
5216 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
5217 for more details. */
5218 void
5219 ix86_call_abi_override (const_tree fndecl)
5220 {
5221 if (fndecl == NULL_TREE)
5222 cfun->machine->call_abi = ix86_abi;
5223 else
5224 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5225 }
5226
5227 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
5228 re-initialization of init_regs each time we switch function context since
5229 this is needed only during RTL expansion. */
5230 static void
5231 ix86_maybe_switch_abi (void)
5232 {
5233 if (TARGET_64BIT &&
5234 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5235 reinit_regs ();
5236 }
5237
5238 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5239 for a call to a function whose data type is FNTYPE.
5240 For a library call, FNTYPE is 0. */
5241
5242 void
5243 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5244 tree fntype, /* tree ptr for function decl */
5245 rtx libname, /* SYMBOL_REF of library name or 0 */
5246 tree fndecl)
5247 {
5248 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
5249 memset (cum, 0, sizeof (*cum));
5250
5251 if (fndecl)
5252 cum->call_abi = ix86_function_abi (fndecl);
5253 else
5254 cum->call_abi = ix86_function_type_abi (fntype);
5255 /* Set up the number of registers to use for passing arguments. */
5256
5257 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5258 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5259 "or subtarget optimization implying it");
5260 cum->nregs = ix86_regparm;
5261 if (TARGET_64BIT)
5262 {
5263 cum->nregs = (cum->call_abi == SYSV_ABI
5264 ? X86_64_REGPARM_MAX
5265 : X86_64_MS_REGPARM_MAX);
5266 }
5267 if (TARGET_SSE)
5268 {
5269 cum->sse_nregs = SSE_REGPARM_MAX;
5270 if (TARGET_64BIT)
5271 {
5272 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5273 ? X86_64_SSE_REGPARM_MAX
5274 : X86_64_MS_SSE_REGPARM_MAX);
5275 }
5276 }
5277 if (TARGET_MMX)
5278 cum->mmx_nregs = MMX_REGPARM_MAX;
5279 cum->warn_avx = true;
5280 cum->warn_sse = true;
5281 cum->warn_mmx = true;
5282
5283 /* Because type might mismatch in between caller and callee, we need to
5284 use actual type of function for local calls.
5285 FIXME: cgraph_analyze can be told to actually record if function uses
5286 va_start so for local functions maybe_vaarg can be made aggressive
5287 helping K&R code.
5288 FIXME: once typesytem is fixed, we won't need this code anymore. */
5289 if (i && i->local)
5290 fntype = TREE_TYPE (fndecl);
5291 cum->maybe_vaarg = (fntype
5292 ? (!prototype_p (fntype) || stdarg_p (fntype))
5293 : !libname);
5294
5295 if (!TARGET_64BIT)
5296 {
5297 /* If there are variable arguments, then we won't pass anything
5298 in registers in 32-bit mode. */
5299 if (stdarg_p (fntype))
5300 {
5301 cum->nregs = 0;
5302 cum->sse_nregs = 0;
5303 cum->mmx_nregs = 0;
5304 cum->warn_avx = 0;
5305 cum->warn_sse = 0;
5306 cum->warn_mmx = 0;
5307 return;
5308 }
5309
5310 /* Use ecx and edx registers if function has fastcall attribute,
5311 else look for regparm information. */
5312 if (fntype)
5313 {
5314 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
5315 {
5316 cum->nregs = 1;
5317 cum->fastcall = 1; /* Same first register as in fastcall. */
5318 }
5319 else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
5320 {
5321 cum->nregs = 2;
5322 cum->fastcall = 1;
5323 }
5324 else
5325 cum->nregs = ix86_function_regparm (fntype, fndecl);
5326 }
5327
5328 /* Set up the number of SSE registers used for passing SFmode
5329 and DFmode arguments. Warn for mismatching ABI. */
5330 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5331 }
5332 }
5333
5334 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5335 But in the case of vector types, it is some vector mode.
5336
5337 When we have only some of our vector isa extensions enabled, then there
5338 are some modes for which vector_mode_supported_p is false. For these
5339 modes, the generic vector support in gcc will choose some non-vector mode
5340 in order to implement the type. By computing the natural mode, we'll
5341 select the proper ABI location for the operand and not depend on whatever
5342 the middle-end decides to do with these vector types.
5343
5344 The midde-end can't deal with the vector types > 16 bytes. In this
5345 case, we return the original mode and warn ABI change if CUM isn't
5346 NULL. */
5347
5348 static enum machine_mode
5349 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5350 {
5351 enum machine_mode mode = TYPE_MODE (type);
5352
5353 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5354 {
5355 HOST_WIDE_INT size = int_size_in_bytes (type);
5356 if ((size == 8 || size == 16 || size == 32)
5357 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5358 && TYPE_VECTOR_SUBPARTS (type) > 1)
5359 {
5360 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5361
5362 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5363 mode = MIN_MODE_VECTOR_FLOAT;
5364 else
5365 mode = MIN_MODE_VECTOR_INT;
5366
5367 /* Get the mode which has this inner mode and number of units. */
5368 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5369 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5370 && GET_MODE_INNER (mode) == innermode)
5371 {
5372 if (size == 32 && !TARGET_AVX)
5373 {
5374 static bool warnedavx;
5375
5376 if (cum
5377 && !warnedavx
5378 && cum->warn_avx)
5379 {
5380 warnedavx = true;
5381 warning (0, "AVX vector argument without AVX "
5382 "enabled changes the ABI");
5383 }
5384 return TYPE_MODE (type);
5385 }
5386 else
5387 return mode;
5388 }
5389
5390 gcc_unreachable ();
5391 }
5392 }
5393
5394 return mode;
5395 }
5396
5397 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5398 this may not agree with the mode that the type system has chosen for the
5399 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5400 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5401
5402 static rtx
5403 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5404 unsigned int regno)
5405 {
5406 rtx tmp;
5407
5408 if (orig_mode != BLKmode)
5409 tmp = gen_rtx_REG (orig_mode, regno);
5410 else
5411 {
5412 tmp = gen_rtx_REG (mode, regno);
5413 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5414 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5415 }
5416
5417 return tmp;
5418 }
5419
5420 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5421 of this code is to classify each 8bytes of incoming argument by the register
5422 class and assign registers accordingly. */
5423
5424 /* Return the union class of CLASS1 and CLASS2.
5425 See the x86-64 PS ABI for details. */
5426
5427 static enum x86_64_reg_class
5428 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5429 {
5430 /* Rule #1: If both classes are equal, this is the resulting class. */
5431 if (class1 == class2)
5432 return class1;
5433
5434 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5435 the other class. */
5436 if (class1 == X86_64_NO_CLASS)
5437 return class2;
5438 if (class2 == X86_64_NO_CLASS)
5439 return class1;
5440
5441 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5442 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5443 return X86_64_MEMORY_CLASS;
5444
5445 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5446 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5447 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5448 return X86_64_INTEGERSI_CLASS;
5449 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5450 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5451 return X86_64_INTEGER_CLASS;
5452
5453 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5454 MEMORY is used. */
5455 if (class1 == X86_64_X87_CLASS
5456 || class1 == X86_64_X87UP_CLASS
5457 || class1 == X86_64_COMPLEX_X87_CLASS
5458 || class2 == X86_64_X87_CLASS
5459 || class2 == X86_64_X87UP_CLASS
5460 || class2 == X86_64_COMPLEX_X87_CLASS)
5461 return X86_64_MEMORY_CLASS;
5462
5463 /* Rule #6: Otherwise class SSE is used. */
5464 return X86_64_SSE_CLASS;
5465 }
5466
5467 /* Classify the argument of type TYPE and mode MODE.
5468 CLASSES will be filled by the register class used to pass each word
5469 of the operand. The number of words is returned. In case the parameter
5470 should be passed in memory, 0 is returned. As a special case for zero
5471 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5472
5473 BIT_OFFSET is used internally for handling records and specifies offset
5474 of the offset in bits modulo 256 to avoid overflow cases.
5475
5476 See the x86-64 PS ABI for details.
5477 */
5478
5479 static int
5480 classify_argument (enum machine_mode mode, const_tree type,
5481 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5482 {
5483 HOST_WIDE_INT bytes =
5484 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5485 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5486
5487 /* Variable sized entities are always passed/returned in memory. */
5488 if (bytes < 0)
5489 return 0;
5490
5491 if (mode != VOIDmode
5492 && targetm.calls.must_pass_in_stack (mode, type))
5493 return 0;
5494
5495 if (type && AGGREGATE_TYPE_P (type))
5496 {
5497 int i;
5498 tree field;
5499 enum x86_64_reg_class subclasses[MAX_CLASSES];
5500
5501 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5502 if (bytes > 32)
5503 return 0;
5504
5505 for (i = 0; i < words; i++)
5506 classes[i] = X86_64_NO_CLASS;
5507
5508 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5509 signalize memory class, so handle it as special case. */
5510 if (!words)
5511 {
5512 classes[0] = X86_64_NO_CLASS;
5513 return 1;
5514 }
5515
5516 /* Classify each field of record and merge classes. */
5517 switch (TREE_CODE (type))
5518 {
5519 case RECORD_TYPE:
5520 /* And now merge the fields of structure. */
5521 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5522 {
5523 if (TREE_CODE (field) == FIELD_DECL)
5524 {
5525 int num;
5526
5527 if (TREE_TYPE (field) == error_mark_node)
5528 continue;
5529
5530 /* Bitfields are always classified as integer. Handle them
5531 early, since later code would consider them to be
5532 misaligned integers. */
5533 if (DECL_BIT_FIELD (field))
5534 {
5535 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5536 i < ((int_bit_position (field) + (bit_offset % 64))
5537 + tree_low_cst (DECL_SIZE (field), 0)
5538 + 63) / 8 / 8; i++)
5539 classes[i] =
5540 merge_classes (X86_64_INTEGER_CLASS,
5541 classes[i]);
5542 }
5543 else
5544 {
5545 int pos;
5546
5547 type = TREE_TYPE (field);
5548
5549 /* Flexible array member is ignored. */
5550 if (TYPE_MODE (type) == BLKmode
5551 && TREE_CODE (type) == ARRAY_TYPE
5552 && TYPE_SIZE (type) == NULL_TREE
5553 && TYPE_DOMAIN (type) != NULL_TREE
5554 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5555 == NULL_TREE))
5556 {
5557 static bool warned;
5558
5559 if (!warned && warn_psabi)
5560 {
5561 warned = true;
5562 inform (input_location,
5563 "The ABI of passing struct with"
5564 " a flexible array member has"
5565 " changed in GCC 4.4");
5566 }
5567 continue;
5568 }
5569 num = classify_argument (TYPE_MODE (type), type,
5570 subclasses,
5571 (int_bit_position (field)
5572 + bit_offset) % 256);
5573 if (!num)
5574 return 0;
5575 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5576 for (i = 0; i < num && (i + pos) < words; i++)
5577 classes[i + pos] =
5578 merge_classes (subclasses[i], classes[i + pos]);
5579 }
5580 }
5581 }
5582 break;
5583
5584 case ARRAY_TYPE:
5585 /* Arrays are handled as small records. */
5586 {
5587 int num;
5588 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5589 TREE_TYPE (type), subclasses, bit_offset);
5590 if (!num)
5591 return 0;
5592
5593 /* The partial classes are now full classes. */
5594 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5595 subclasses[0] = X86_64_SSE_CLASS;
5596 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5597 && !((bit_offset % 64) == 0 && bytes == 4))
5598 subclasses[0] = X86_64_INTEGER_CLASS;
5599
5600 for (i = 0; i < words; i++)
5601 classes[i] = subclasses[i % num];
5602
5603 break;
5604 }
5605 case UNION_TYPE:
5606 case QUAL_UNION_TYPE:
5607 /* Unions are similar to RECORD_TYPE but offset is always 0.
5608 */
5609 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5610 {
5611 if (TREE_CODE (field) == FIELD_DECL)
5612 {
5613 int num;
5614
5615 if (TREE_TYPE (field) == error_mark_node)
5616 continue;
5617
5618 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5619 TREE_TYPE (field), subclasses,
5620 bit_offset);
5621 if (!num)
5622 return 0;
5623 for (i = 0; i < num; i++)
5624 classes[i] = merge_classes (subclasses[i], classes[i]);
5625 }
5626 }
5627 break;
5628
5629 default:
5630 gcc_unreachable ();
5631 }
5632
5633 if (words > 2)
5634 {
5635 /* When size > 16 bytes, if the first one isn't
5636 X86_64_SSE_CLASS or any other ones aren't
5637 X86_64_SSEUP_CLASS, everything should be passed in
5638 memory. */
5639 if (classes[0] != X86_64_SSE_CLASS)
5640 return 0;
5641
5642 for (i = 1; i < words; i++)
5643 if (classes[i] != X86_64_SSEUP_CLASS)
5644 return 0;
5645 }
5646
5647 /* Final merger cleanup. */
5648 for (i = 0; i < words; i++)
5649 {
5650 /* If one class is MEMORY, everything should be passed in
5651 memory. */
5652 if (classes[i] == X86_64_MEMORY_CLASS)
5653 return 0;
5654
5655 /* The X86_64_SSEUP_CLASS should be always preceded by
5656 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5657 if (classes[i] == X86_64_SSEUP_CLASS
5658 && classes[i - 1] != X86_64_SSE_CLASS
5659 && classes[i - 1] != X86_64_SSEUP_CLASS)
5660 {
5661 /* The first one should never be X86_64_SSEUP_CLASS. */
5662 gcc_assert (i != 0);
5663 classes[i] = X86_64_SSE_CLASS;
5664 }
5665
5666 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5667 everything should be passed in memory. */
5668 if (classes[i] == X86_64_X87UP_CLASS
5669 && (classes[i - 1] != X86_64_X87_CLASS))
5670 {
5671 static bool warned;
5672
5673 /* The first one should never be X86_64_X87UP_CLASS. */
5674 gcc_assert (i != 0);
5675 if (!warned && warn_psabi)
5676 {
5677 warned = true;
5678 inform (input_location,
5679 "The ABI of passing union with long double"
5680 " has changed in GCC 4.4");
5681 }
5682 return 0;
5683 }
5684 }
5685 return words;
5686 }
5687
5688 /* Compute alignment needed. We align all types to natural boundaries with
5689 exception of XFmode that is aligned to 64bits. */
5690 if (mode != VOIDmode && mode != BLKmode)
5691 {
5692 int mode_alignment = GET_MODE_BITSIZE (mode);
5693
5694 if (mode == XFmode)
5695 mode_alignment = 128;
5696 else if (mode == XCmode)
5697 mode_alignment = 256;
5698 if (COMPLEX_MODE_P (mode))
5699 mode_alignment /= 2;
5700 /* Misaligned fields are always returned in memory. */
5701 if (bit_offset % mode_alignment)
5702 return 0;
5703 }
5704
5705 /* for V1xx modes, just use the base mode */
5706 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
5707 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5708 mode = GET_MODE_INNER (mode);
5709
5710 /* Classification of atomic types. */
5711 switch (mode)
5712 {
5713 case SDmode:
5714 case DDmode:
5715 classes[0] = X86_64_SSE_CLASS;
5716 return 1;
5717 case TDmode:
5718 classes[0] = X86_64_SSE_CLASS;
5719 classes[1] = X86_64_SSEUP_CLASS;
5720 return 2;
5721 case DImode:
5722 case SImode:
5723 case HImode:
5724 case QImode:
5725 case CSImode:
5726 case CHImode:
5727 case CQImode:
5728 {
5729 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5730
5731 if (size <= 32)
5732 {
5733 classes[0] = X86_64_INTEGERSI_CLASS;
5734 return 1;
5735 }
5736 else if (size <= 64)
5737 {
5738 classes[0] = X86_64_INTEGER_CLASS;
5739 return 1;
5740 }
5741 else if (size <= 64+32)
5742 {
5743 classes[0] = X86_64_INTEGER_CLASS;
5744 classes[1] = X86_64_INTEGERSI_CLASS;
5745 return 2;
5746 }
5747 else if (size <= 64+64)
5748 {
5749 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5750 return 2;
5751 }
5752 else
5753 gcc_unreachable ();
5754 }
5755 case CDImode:
5756 case TImode:
5757 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5758 return 2;
5759 case COImode:
5760 case OImode:
5761 /* OImode shouldn't be used directly. */
5762 gcc_unreachable ();
5763 case CTImode:
5764 return 0;
5765 case SFmode:
5766 if (!(bit_offset % 64))
5767 classes[0] = X86_64_SSESF_CLASS;
5768 else
5769 classes[0] = X86_64_SSE_CLASS;
5770 return 1;
5771 case DFmode:
5772 classes[0] = X86_64_SSEDF_CLASS;
5773 return 1;
5774 case XFmode:
5775 classes[0] = X86_64_X87_CLASS;
5776 classes[1] = X86_64_X87UP_CLASS;
5777 return 2;
5778 case TFmode:
5779 classes[0] = X86_64_SSE_CLASS;
5780 classes[1] = X86_64_SSEUP_CLASS;
5781 return 2;
5782 case SCmode:
5783 classes[0] = X86_64_SSE_CLASS;
5784 if (!(bit_offset % 64))
5785 return 1;
5786 else
5787 {
5788 static bool warned;
5789
5790 if (!warned && warn_psabi)
5791 {
5792 warned = true;
5793 inform (input_location,
5794 "The ABI of passing structure with complex float"
5795 " member has changed in GCC 4.4");
5796 }
5797 classes[1] = X86_64_SSESF_CLASS;
5798 return 2;
5799 }
5800 case DCmode:
5801 classes[0] = X86_64_SSEDF_CLASS;
5802 classes[1] = X86_64_SSEDF_CLASS;
5803 return 2;
5804 case XCmode:
5805 classes[0] = X86_64_COMPLEX_X87_CLASS;
5806 return 1;
5807 case TCmode:
5808 /* This modes is larger than 16 bytes. */
5809 return 0;
5810 case V8SFmode:
5811 case V8SImode:
5812 case V32QImode:
5813 case V16HImode:
5814 case V4DFmode:
5815 case V4DImode:
5816 classes[0] = X86_64_SSE_CLASS;
5817 classes[1] = X86_64_SSEUP_CLASS;
5818 classes[2] = X86_64_SSEUP_CLASS;
5819 classes[3] = X86_64_SSEUP_CLASS;
5820 return 4;
5821 case V4SFmode:
5822 case V4SImode:
5823 case V16QImode:
5824 case V8HImode:
5825 case V2DFmode:
5826 case V2DImode:
5827 classes[0] = X86_64_SSE_CLASS;
5828 classes[1] = X86_64_SSEUP_CLASS;
5829 return 2;
5830 case V1TImode:
5831 case V1DImode:
5832 case V2SFmode:
5833 case V2SImode:
5834 case V4HImode:
5835 case V8QImode:
5836 classes[0] = X86_64_SSE_CLASS;
5837 return 1;
5838 case BLKmode:
5839 case VOIDmode:
5840 return 0;
5841 default:
5842 gcc_assert (VECTOR_MODE_P (mode));
5843
5844 if (bytes > 16)
5845 return 0;
5846
5847 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5848
5849 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5850 classes[0] = X86_64_INTEGERSI_CLASS;
5851 else
5852 classes[0] = X86_64_INTEGER_CLASS;
5853 classes[1] = X86_64_INTEGER_CLASS;
5854 return 1 + (bytes > 8);
5855 }
5856 }
5857
5858 /* Examine the argument and return set number of register required in each
5859 class. Return 0 iff parameter should be passed in memory. */
5860 static int
5861 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5862 int *int_nregs, int *sse_nregs)
5863 {
5864 enum x86_64_reg_class regclass[MAX_CLASSES];
5865 int n = classify_argument (mode, type, regclass, 0);
5866
5867 *int_nregs = 0;
5868 *sse_nregs = 0;
5869 if (!n)
5870 return 0;
5871 for (n--; n >= 0; n--)
5872 switch (regclass[n])
5873 {
5874 case X86_64_INTEGER_CLASS:
5875 case X86_64_INTEGERSI_CLASS:
5876 (*int_nregs)++;
5877 break;
5878 case X86_64_SSE_CLASS:
5879 case X86_64_SSESF_CLASS:
5880 case X86_64_SSEDF_CLASS:
5881 (*sse_nregs)++;
5882 break;
5883 case X86_64_NO_CLASS:
5884 case X86_64_SSEUP_CLASS:
5885 break;
5886 case X86_64_X87_CLASS:
5887 case X86_64_X87UP_CLASS:
5888 if (!in_return)
5889 return 0;
5890 break;
5891 case X86_64_COMPLEX_X87_CLASS:
5892 return in_return ? 2 : 0;
5893 case X86_64_MEMORY_CLASS:
5894 gcc_unreachable ();
5895 }
5896 return 1;
5897 }
5898
5899 /* Construct container for the argument used by GCC interface. See
5900 FUNCTION_ARG for the detailed description. */
5901
5902 static rtx
5903 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5904 const_tree type, int in_return, int nintregs, int nsseregs,
5905 const int *intreg, int sse_regno)
5906 {
5907 /* The following variables hold the static issued_error state. */
5908 static bool issued_sse_arg_error;
5909 static bool issued_sse_ret_error;
5910 static bool issued_x87_ret_error;
5911
5912 enum machine_mode tmpmode;
5913 int bytes =
5914 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5915 enum x86_64_reg_class regclass[MAX_CLASSES];
5916 int n;
5917 int i;
5918 int nexps = 0;
5919 int needed_sseregs, needed_intregs;
5920 rtx exp[MAX_CLASSES];
5921 rtx ret;
5922
5923 n = classify_argument (mode, type, regclass, 0);
5924 if (!n)
5925 return NULL;
5926 if (!examine_argument (mode, type, in_return, &needed_intregs,
5927 &needed_sseregs))
5928 return NULL;
5929 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5930 return NULL;
5931
5932 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5933 some less clueful developer tries to use floating-point anyway. */
5934 if (needed_sseregs && !TARGET_SSE)
5935 {
5936 if (in_return)
5937 {
5938 if (!issued_sse_ret_error)
5939 {
5940 error ("SSE register return with SSE disabled");
5941 issued_sse_ret_error = true;
5942 }
5943 }
5944 else if (!issued_sse_arg_error)
5945 {
5946 error ("SSE register argument with SSE disabled");
5947 issued_sse_arg_error = true;
5948 }
5949 return NULL;
5950 }
5951
5952 /* Likewise, error if the ABI requires us to return values in the
5953 x87 registers and the user specified -mno-80387. */
5954 if (!TARGET_80387 && in_return)
5955 for (i = 0; i < n; i++)
5956 if (regclass[i] == X86_64_X87_CLASS
5957 || regclass[i] == X86_64_X87UP_CLASS
5958 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5959 {
5960 if (!issued_x87_ret_error)
5961 {
5962 error ("x87 register return with x87 disabled");
5963 issued_x87_ret_error = true;
5964 }
5965 return NULL;
5966 }
5967
5968 /* First construct simple cases. Avoid SCmode, since we want to use
5969 single register to pass this type. */
5970 if (n == 1 && mode != SCmode)
5971 switch (regclass[0])
5972 {
5973 case X86_64_INTEGER_CLASS:
5974 case X86_64_INTEGERSI_CLASS:
5975 return gen_rtx_REG (mode, intreg[0]);
5976 case X86_64_SSE_CLASS:
5977 case X86_64_SSESF_CLASS:
5978 case X86_64_SSEDF_CLASS:
5979 if (mode != BLKmode)
5980 return gen_reg_or_parallel (mode, orig_mode,
5981 SSE_REGNO (sse_regno));
5982 break;
5983 case X86_64_X87_CLASS:
5984 case X86_64_COMPLEX_X87_CLASS:
5985 return gen_rtx_REG (mode, FIRST_STACK_REG);
5986 case X86_64_NO_CLASS:
5987 /* Zero sized array, struct or class. */
5988 return NULL;
5989 default:
5990 gcc_unreachable ();
5991 }
5992 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5993 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5994 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5995 if (n == 4
5996 && regclass[0] == X86_64_SSE_CLASS
5997 && regclass[1] == X86_64_SSEUP_CLASS
5998 && regclass[2] == X86_64_SSEUP_CLASS
5999 && regclass[3] == X86_64_SSEUP_CLASS
6000 && mode != BLKmode)
6001 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6002
6003 if (n == 2
6004 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6005 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6006 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6007 && regclass[1] == X86_64_INTEGER_CLASS
6008 && (mode == CDImode || mode == TImode || mode == TFmode)
6009 && intreg[0] + 1 == intreg[1])
6010 return gen_rtx_REG (mode, intreg[0]);
6011
6012 /* Otherwise figure out the entries of the PARALLEL. */
6013 for (i = 0; i < n; i++)
6014 {
6015 int pos;
6016
6017 switch (regclass[i])
6018 {
6019 case X86_64_NO_CLASS:
6020 break;
6021 case X86_64_INTEGER_CLASS:
6022 case X86_64_INTEGERSI_CLASS:
6023 /* Merge TImodes on aligned occasions here too. */
6024 if (i * 8 + 8 > bytes)
6025 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6026 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6027 tmpmode = SImode;
6028 else
6029 tmpmode = DImode;
6030 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6031 if (tmpmode == BLKmode)
6032 tmpmode = DImode;
6033 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6034 gen_rtx_REG (tmpmode, *intreg),
6035 GEN_INT (i*8));
6036 intreg++;
6037 break;
6038 case X86_64_SSESF_CLASS:
6039 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6040 gen_rtx_REG (SFmode,
6041 SSE_REGNO (sse_regno)),
6042 GEN_INT (i*8));
6043 sse_regno++;
6044 break;
6045 case X86_64_SSEDF_CLASS:
6046 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6047 gen_rtx_REG (DFmode,
6048 SSE_REGNO (sse_regno)),
6049 GEN_INT (i*8));
6050 sse_regno++;
6051 break;
6052 case X86_64_SSE_CLASS:
6053 pos = i;
6054 switch (n)
6055 {
6056 case 1:
6057 tmpmode = DImode;
6058 break;
6059 case 2:
6060 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6061 {
6062 tmpmode = TImode;
6063 i++;
6064 }
6065 else
6066 tmpmode = DImode;
6067 break;
6068 case 4:
6069 gcc_assert (i == 0
6070 && regclass[1] == X86_64_SSEUP_CLASS
6071 && regclass[2] == X86_64_SSEUP_CLASS
6072 && regclass[3] == X86_64_SSEUP_CLASS);
6073 tmpmode = OImode;
6074 i += 3;
6075 break;
6076 default:
6077 gcc_unreachable ();
6078 }
6079 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6080 gen_rtx_REG (tmpmode,
6081 SSE_REGNO (sse_regno)),
6082 GEN_INT (pos*8));
6083 sse_regno++;
6084 break;
6085 default:
6086 gcc_unreachable ();
6087 }
6088 }
6089
6090 /* Empty aligned struct, union or class. */
6091 if (nexps == 0)
6092 return NULL;
6093
6094 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6095 for (i = 0; i < nexps; i++)
6096 XVECEXP (ret, 0, i) = exp [i];
6097 return ret;
6098 }
6099
6100 /* Update the data in CUM to advance over an argument of mode MODE
6101 and data type TYPE. (TYPE is null for libcalls where that information
6102 may not be available.) */
6103
6104 static void
6105 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6106 const_tree type, HOST_WIDE_INT bytes,
6107 HOST_WIDE_INT words)
6108 {
6109 switch (mode)
6110 {
6111 default:
6112 break;
6113
6114 case BLKmode:
6115 if (bytes < 0)
6116 break;
6117 /* FALLTHRU */
6118
6119 case DImode:
6120 case SImode:
6121 case HImode:
6122 case QImode:
6123 cum->words += words;
6124 cum->nregs -= words;
6125 cum->regno += words;
6126
6127 if (cum->nregs <= 0)
6128 {
6129 cum->nregs = 0;
6130 cum->regno = 0;
6131 }
6132 break;
6133
6134 case OImode:
6135 /* OImode shouldn't be used directly. */
6136 gcc_unreachable ();
6137
6138 case DFmode:
6139 if (cum->float_in_sse < 2)
6140 break;
6141 case SFmode:
6142 if (cum->float_in_sse < 1)
6143 break;
6144 /* FALLTHRU */
6145
6146 case V8SFmode:
6147 case V8SImode:
6148 case V32QImode:
6149 case V16HImode:
6150 case V4DFmode:
6151 case V4DImode:
6152 case TImode:
6153 case V16QImode:
6154 case V8HImode:
6155 case V4SImode:
6156 case V2DImode:
6157 case V4SFmode:
6158 case V2DFmode:
6159 if (!type || !AGGREGATE_TYPE_P (type))
6160 {
6161 cum->sse_words += words;
6162 cum->sse_nregs -= 1;
6163 cum->sse_regno += 1;
6164 if (cum->sse_nregs <= 0)
6165 {
6166 cum->sse_nregs = 0;
6167 cum->sse_regno = 0;
6168 }
6169 }
6170 break;
6171
6172 case V8QImode:
6173 case V4HImode:
6174 case V2SImode:
6175 case V2SFmode:
6176 case V1TImode:
6177 case V1DImode:
6178 if (!type || !AGGREGATE_TYPE_P (type))
6179 {
6180 cum->mmx_words += words;
6181 cum->mmx_nregs -= 1;
6182 cum->mmx_regno += 1;
6183 if (cum->mmx_nregs <= 0)
6184 {
6185 cum->mmx_nregs = 0;
6186 cum->mmx_regno = 0;
6187 }
6188 }
6189 break;
6190 }
6191 }
6192
6193 static void
6194 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6195 const_tree type, HOST_WIDE_INT words, bool named)
6196 {
6197 int int_nregs, sse_nregs;
6198
6199 /* Unnamed 256bit vector mode parameters are passed on stack. */
6200 if (!named && VALID_AVX256_REG_MODE (mode))
6201 return;
6202
6203 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6204 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6205 {
6206 cum->nregs -= int_nregs;
6207 cum->sse_nregs -= sse_nregs;
6208 cum->regno += int_nregs;
6209 cum->sse_regno += sse_nregs;
6210 }
6211 else
6212 {
6213 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6214 cum->words = (cum->words + align - 1) & ~(align - 1);
6215 cum->words += words;
6216 }
6217 }
6218
6219 static void
6220 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6221 HOST_WIDE_INT words)
6222 {
6223 /* Otherwise, this should be passed indirect. */
6224 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6225
6226 cum->words += words;
6227 if (cum->nregs > 0)
6228 {
6229 cum->nregs -= 1;
6230 cum->regno += 1;
6231 }
6232 }
6233
6234 /* Update the data in CUM to advance over an argument of mode MODE and
6235 data type TYPE. (TYPE is null for libcalls where that information
6236 may not be available.) */
6237
6238 static void
6239 ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6240 const_tree type, bool named)
6241 {
6242 HOST_WIDE_INT bytes, words;
6243
6244 if (mode == BLKmode)
6245 bytes = int_size_in_bytes (type);
6246 else
6247 bytes = GET_MODE_SIZE (mode);
6248 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6249
6250 if (type)
6251 mode = type_natural_mode (type, NULL);
6252
6253 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6254 function_arg_advance_ms_64 (cum, bytes, words);
6255 else if (TARGET_64BIT)
6256 function_arg_advance_64 (cum, mode, type, words, named);
6257 else
6258 function_arg_advance_32 (cum, mode, type, bytes, words);
6259 }
6260
6261 /* Define where to put the arguments to a function.
6262 Value is zero to push the argument on the stack,
6263 or a hard register in which to store the argument.
6264
6265 MODE is the argument's machine mode.
6266 TYPE is the data type of the argument (as a tree).
6267 This is null for libcalls where that information may
6268 not be available.
6269 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6270 the preceding args and about the function being called.
6271 NAMED is nonzero if this argument is a named parameter
6272 (otherwise it is an extra parameter matching an ellipsis). */
6273
6274 static rtx
6275 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6276 enum machine_mode orig_mode, const_tree type,
6277 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6278 {
6279 static bool warnedsse, warnedmmx;
6280
6281 /* Avoid the AL settings for the Unix64 ABI. */
6282 if (mode == VOIDmode)
6283 return constm1_rtx;
6284
6285 switch (mode)
6286 {
6287 default:
6288 break;
6289
6290 case BLKmode:
6291 if (bytes < 0)
6292 break;
6293 /* FALLTHRU */
6294 case DImode:
6295 case SImode:
6296 case HImode:
6297 case QImode:
6298 if (words <= cum->nregs)
6299 {
6300 int regno = cum->regno;
6301
6302 /* Fastcall allocates the first two DWORD (SImode) or
6303 smaller arguments to ECX and EDX if it isn't an
6304 aggregate type . */
6305 if (cum->fastcall)
6306 {
6307 if (mode == BLKmode
6308 || mode == DImode
6309 || (type && AGGREGATE_TYPE_P (type)))
6310 break;
6311
6312 /* ECX not EAX is the first allocated register. */
6313 if (regno == AX_REG)
6314 regno = CX_REG;
6315 }
6316 return gen_rtx_REG (mode, regno);
6317 }
6318 break;
6319
6320 case DFmode:
6321 if (cum->float_in_sse < 2)
6322 break;
6323 case SFmode:
6324 if (cum->float_in_sse < 1)
6325 break;
6326 /* FALLTHRU */
6327 case TImode:
6328 /* In 32bit, we pass TImode in xmm registers. */
6329 case V16QImode:
6330 case V8HImode:
6331 case V4SImode:
6332 case V2DImode:
6333 case V4SFmode:
6334 case V2DFmode:
6335 if (!type || !AGGREGATE_TYPE_P (type))
6336 {
6337 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6338 {
6339 warnedsse = true;
6340 warning (0, "SSE vector argument without SSE enabled "
6341 "changes the ABI");
6342 }
6343 if (cum->sse_nregs)
6344 return gen_reg_or_parallel (mode, orig_mode,
6345 cum->sse_regno + FIRST_SSE_REG);
6346 }
6347 break;
6348
6349 case OImode:
6350 /* OImode shouldn't be used directly. */
6351 gcc_unreachable ();
6352
6353 case V8SFmode:
6354 case V8SImode:
6355 case V32QImode:
6356 case V16HImode:
6357 case V4DFmode:
6358 case V4DImode:
6359 if (!type || !AGGREGATE_TYPE_P (type))
6360 {
6361 if (cum->sse_nregs)
6362 return gen_reg_or_parallel (mode, orig_mode,
6363 cum->sse_regno + FIRST_SSE_REG);
6364 }
6365 break;
6366
6367 case V8QImode:
6368 case V4HImode:
6369 case V2SImode:
6370 case V2SFmode:
6371 case V1TImode:
6372 case V1DImode:
6373 if (!type || !AGGREGATE_TYPE_P (type))
6374 {
6375 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6376 {
6377 warnedmmx = true;
6378 warning (0, "MMX vector argument without MMX enabled "
6379 "changes the ABI");
6380 }
6381 if (cum->mmx_nregs)
6382 return gen_reg_or_parallel (mode, orig_mode,
6383 cum->mmx_regno + FIRST_MMX_REG);
6384 }
6385 break;
6386 }
6387
6388 return NULL_RTX;
6389 }
6390
6391 static rtx
6392 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6393 enum machine_mode orig_mode, const_tree type, bool named)
6394 {
6395 /* Handle a hidden AL argument containing number of registers
6396 for varargs x86-64 functions. */
6397 if (mode == VOIDmode)
6398 return GEN_INT (cum->maybe_vaarg
6399 ? (cum->sse_nregs < 0
6400 ? X86_64_SSE_REGPARM_MAX
6401 : cum->sse_regno)
6402 : -1);
6403
6404 switch (mode)
6405 {
6406 default:
6407 break;
6408
6409 case V8SFmode:
6410 case V8SImode:
6411 case V32QImode:
6412 case V16HImode:
6413 case V4DFmode:
6414 case V4DImode:
6415 /* Unnamed 256bit vector mode parameters are passed on stack. */
6416 if (!named)
6417 return NULL;
6418 break;
6419 }
6420
6421 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6422 cum->sse_nregs,
6423 &x86_64_int_parameter_registers [cum->regno],
6424 cum->sse_regno);
6425 }
6426
6427 static rtx
6428 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6429 enum machine_mode orig_mode, bool named,
6430 HOST_WIDE_INT bytes)
6431 {
6432 unsigned int regno;
6433
6434 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6435 We use value of -2 to specify that current function call is MSABI. */
6436 if (mode == VOIDmode)
6437 return GEN_INT (-2);
6438
6439 /* If we've run out of registers, it goes on the stack. */
6440 if (cum->nregs == 0)
6441 return NULL_RTX;
6442
6443 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6444
6445 /* Only floating point modes are passed in anything but integer regs. */
6446 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6447 {
6448 if (named)
6449 regno = cum->regno + FIRST_SSE_REG;
6450 else
6451 {
6452 rtx t1, t2;
6453
6454 /* Unnamed floating parameters are passed in both the
6455 SSE and integer registers. */
6456 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6457 t2 = gen_rtx_REG (mode, regno);
6458 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6459 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6460 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6461 }
6462 }
6463 /* Handle aggregated types passed in register. */
6464 if (orig_mode == BLKmode)
6465 {
6466 if (bytes > 0 && bytes <= 8)
6467 mode = (bytes > 4 ? DImode : SImode);
6468 if (mode == BLKmode)
6469 mode = DImode;
6470 }
6471
6472 return gen_reg_or_parallel (mode, orig_mode, regno);
6473 }
6474
6475 /* Return where to put the arguments to a function.
6476 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6477
6478 MODE is the argument's machine mode. TYPE is the data type of the
6479 argument. It is null for libcalls where that information may not be
6480 available. CUM gives information about the preceding args and about
6481 the function being called. NAMED is nonzero if this argument is a
6482 named parameter (otherwise it is an extra parameter matching an
6483 ellipsis). */
6484
6485 static rtx
6486 ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
6487 const_tree type, bool named)
6488 {
6489 enum machine_mode mode = omode;
6490 HOST_WIDE_INT bytes, words;
6491
6492 if (mode == BLKmode)
6493 bytes = int_size_in_bytes (type);
6494 else
6495 bytes = GET_MODE_SIZE (mode);
6496 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6497
6498 /* To simplify the code below, represent vector types with a vector mode
6499 even if MMX/SSE are not active. */
6500 if (type && TREE_CODE (type) == VECTOR_TYPE)
6501 mode = type_natural_mode (type, cum);
6502
6503 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6504 return function_arg_ms_64 (cum, mode, omode, named, bytes);
6505 else if (TARGET_64BIT)
6506 return function_arg_64 (cum, mode, omode, type, named);
6507 else
6508 return function_arg_32 (cum, mode, omode, type, bytes, words);
6509 }
6510
6511 /* A C expression that indicates when an argument must be passed by
6512 reference. If nonzero for an argument, a copy of that argument is
6513 made in memory and a pointer to the argument is passed instead of
6514 the argument itself. The pointer is passed in whatever way is
6515 appropriate for passing a pointer to that type. */
6516
6517 static bool
6518 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6519 enum machine_mode mode ATTRIBUTE_UNUSED,
6520 const_tree type, bool named ATTRIBUTE_UNUSED)
6521 {
6522 /* See Windows x64 Software Convention. */
6523 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6524 {
6525 int msize = (int) GET_MODE_SIZE (mode);
6526 if (type)
6527 {
6528 /* Arrays are passed by reference. */
6529 if (TREE_CODE (type) == ARRAY_TYPE)
6530 return true;
6531
6532 if (AGGREGATE_TYPE_P (type))
6533 {
6534 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6535 are passed by reference. */
6536 msize = int_size_in_bytes (type);
6537 }
6538 }
6539
6540 /* __m128 is passed by reference. */
6541 switch (msize) {
6542 case 1: case 2: case 4: case 8:
6543 break;
6544 default:
6545 return true;
6546 }
6547 }
6548 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6549 return 1;
6550
6551 return 0;
6552 }
6553
6554 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6555 ABI. */
6556 static bool
6557 contains_aligned_value_p (const_tree type)
6558 {
6559 enum machine_mode mode = TYPE_MODE (type);
6560 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6561 || mode == TDmode
6562 || mode == TFmode
6563 || mode == TCmode)
6564 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6565 return true;
6566 if (TYPE_ALIGN (type) < 128)
6567 return false;
6568
6569 if (AGGREGATE_TYPE_P (type))
6570 {
6571 /* Walk the aggregates recursively. */
6572 switch (TREE_CODE (type))
6573 {
6574 case RECORD_TYPE:
6575 case UNION_TYPE:
6576 case QUAL_UNION_TYPE:
6577 {
6578 tree field;
6579
6580 /* Walk all the structure fields. */
6581 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6582 {
6583 if (TREE_CODE (field) == FIELD_DECL
6584 && contains_aligned_value_p (TREE_TYPE (field)))
6585 return true;
6586 }
6587 break;
6588 }
6589
6590 case ARRAY_TYPE:
6591 /* Just for use if some languages passes arrays by value. */
6592 if (contains_aligned_value_p (TREE_TYPE (type)))
6593 return true;
6594 break;
6595
6596 default:
6597 gcc_unreachable ();
6598 }
6599 }
6600 return false;
6601 }
6602
6603 /* Gives the alignment boundary, in bits, of an argument with the
6604 specified mode and type. */
6605
6606 int
6607 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
6608 {
6609 int align;
6610 if (type)
6611 {
6612 /* Since the main variant type is used for call, we convert it to
6613 the main variant type. */
6614 type = TYPE_MAIN_VARIANT (type);
6615 align = TYPE_ALIGN (type);
6616 }
6617 else
6618 align = GET_MODE_ALIGNMENT (mode);
6619 if (align < PARM_BOUNDARY)
6620 align = PARM_BOUNDARY;
6621 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6622 natural boundaries. */
6623 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6624 {
6625 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6626 make an exception for SSE modes since these require 128bit
6627 alignment.
6628
6629 The handling here differs from field_alignment. ICC aligns MMX
6630 arguments to 4 byte boundaries, while structure fields are aligned
6631 to 8 byte boundaries. */
6632 if (!type)
6633 {
6634 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6635 align = PARM_BOUNDARY;
6636 }
6637 else
6638 {
6639 if (!contains_aligned_value_p (type))
6640 align = PARM_BOUNDARY;
6641 }
6642 }
6643 if (align > BIGGEST_ALIGNMENT)
6644 align = BIGGEST_ALIGNMENT;
6645 return align;
6646 }
6647
6648 /* Return true if N is a possible register number of function value. */
6649
6650 static bool
6651 ix86_function_value_regno_p (const unsigned int regno)
6652 {
6653 switch (regno)
6654 {
6655 case 0:
6656 return true;
6657
6658 case FIRST_FLOAT_REG:
6659 /* TODO: The function should depend on current function ABI but
6660 builtins.c would need updating then. Therefore we use the
6661 default ABI. */
6662 if (TARGET_64BIT && ix86_abi == MS_ABI)
6663 return false;
6664 return TARGET_FLOAT_RETURNS_IN_80387;
6665
6666 case FIRST_SSE_REG:
6667 return TARGET_SSE;
6668
6669 case FIRST_MMX_REG:
6670 if (TARGET_MACHO || TARGET_64BIT)
6671 return false;
6672 return TARGET_MMX;
6673 }
6674
6675 return false;
6676 }
6677
6678 /* Define how to find the value returned by a function.
6679 VALTYPE is the data type of the value (as a tree).
6680 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6681 otherwise, FUNC is 0. */
6682
6683 static rtx
6684 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6685 const_tree fntype, const_tree fn)
6686 {
6687 unsigned int regno;
6688
6689 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6690 we normally prevent this case when mmx is not available. However
6691 some ABIs may require the result to be returned like DImode. */
6692 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6693 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6694
6695 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6696 we prevent this case when sse is not available. However some ABIs
6697 may require the result to be returned like integer TImode. */
6698 else if (mode == TImode
6699 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6700 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6701
6702 /* 32-byte vector modes in %ymm0. */
6703 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6704 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6705
6706 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6707 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6708 regno = FIRST_FLOAT_REG;
6709 else
6710 /* Most things go in %eax. */
6711 regno = AX_REG;
6712
6713 /* Override FP return register with %xmm0 for local functions when
6714 SSE math is enabled or for functions with sseregparm attribute. */
6715 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6716 {
6717 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6718 if ((sse_level >= 1 && mode == SFmode)
6719 || (sse_level == 2 && mode == DFmode))
6720 regno = FIRST_SSE_REG;
6721 }
6722
6723 /* OImode shouldn't be used directly. */
6724 gcc_assert (mode != OImode);
6725
6726 return gen_rtx_REG (orig_mode, regno);
6727 }
6728
6729 static rtx
6730 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6731 const_tree valtype)
6732 {
6733 rtx ret;
6734
6735 /* Handle libcalls, which don't provide a type node. */
6736 if (valtype == NULL)
6737 {
6738 switch (mode)
6739 {
6740 case SFmode:
6741 case SCmode:
6742 case DFmode:
6743 case DCmode:
6744 case TFmode:
6745 case SDmode:
6746 case DDmode:
6747 case TDmode:
6748 return gen_rtx_REG (mode, FIRST_SSE_REG);
6749 case XFmode:
6750 case XCmode:
6751 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6752 case TCmode:
6753 return NULL;
6754 default:
6755 return gen_rtx_REG (mode, AX_REG);
6756 }
6757 }
6758
6759 ret = construct_container (mode, orig_mode, valtype, 1,
6760 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6761 x86_64_int_return_registers, 0);
6762
6763 /* For zero sized structures, construct_container returns NULL, but we
6764 need to keep rest of compiler happy by returning meaningful value. */
6765 if (!ret)
6766 ret = gen_rtx_REG (orig_mode, AX_REG);
6767
6768 return ret;
6769 }
6770
6771 static rtx
6772 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6773 {
6774 unsigned int regno = AX_REG;
6775
6776 if (TARGET_SSE)
6777 {
6778 switch (GET_MODE_SIZE (mode))
6779 {
6780 case 16:
6781 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6782 && !COMPLEX_MODE_P (mode))
6783 regno = FIRST_SSE_REG;
6784 break;
6785 case 8:
6786 case 4:
6787 if (mode == SFmode || mode == DFmode)
6788 regno = FIRST_SSE_REG;
6789 break;
6790 default:
6791 break;
6792 }
6793 }
6794 return gen_rtx_REG (orig_mode, regno);
6795 }
6796
6797 static rtx
6798 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6799 enum machine_mode orig_mode, enum machine_mode mode)
6800 {
6801 const_tree fn, fntype;
6802
6803 fn = NULL_TREE;
6804 if (fntype_or_decl && DECL_P (fntype_or_decl))
6805 fn = fntype_or_decl;
6806 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6807
6808 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6809 return function_value_ms_64 (orig_mode, mode);
6810 else if (TARGET_64BIT)
6811 return function_value_64 (orig_mode, mode, valtype);
6812 else
6813 return function_value_32 (orig_mode, mode, fntype, fn);
6814 }
6815
6816 static rtx
6817 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6818 bool outgoing ATTRIBUTE_UNUSED)
6819 {
6820 enum machine_mode mode, orig_mode;
6821
6822 orig_mode = TYPE_MODE (valtype);
6823 mode = type_natural_mode (valtype, NULL);
6824 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6825 }
6826
6827 rtx
6828 ix86_libcall_value (enum machine_mode mode)
6829 {
6830 return ix86_function_value_1 (NULL, NULL, mode, mode);
6831 }
6832
6833 /* Return true iff type is returned in memory. */
6834
6835 static bool ATTRIBUTE_UNUSED
6836 return_in_memory_32 (const_tree type, enum machine_mode mode)
6837 {
6838 HOST_WIDE_INT size;
6839
6840 if (mode == BLKmode)
6841 return true;
6842
6843 size = int_size_in_bytes (type);
6844
6845 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6846 return false;
6847
6848 if (VECTOR_MODE_P (mode) || mode == TImode)
6849 {
6850 /* User-created vectors small enough to fit in EAX. */
6851 if (size < 8)
6852 return false;
6853
6854 /* MMX/3dNow values are returned in MM0,
6855 except when it doesn't exits or the ABI prescribes otherwise. */
6856 if (size == 8)
6857 return !TARGET_MMX || TARGET_VECT8_RETURNS;
6858
6859 /* SSE values are returned in XMM0, except when it doesn't exist. */
6860 if (size == 16)
6861 return !TARGET_SSE;
6862
6863 /* AVX values are returned in YMM0, except when it doesn't exist. */
6864 if (size == 32)
6865 return !TARGET_AVX;
6866 }
6867
6868 if (mode == XFmode)
6869 return false;
6870
6871 if (size > 12)
6872 return true;
6873
6874 /* OImode shouldn't be used directly. */
6875 gcc_assert (mode != OImode);
6876
6877 return false;
6878 }
6879
6880 static bool ATTRIBUTE_UNUSED
6881 return_in_memory_64 (const_tree type, enum machine_mode mode)
6882 {
6883 int needed_intregs, needed_sseregs;
6884 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6885 }
6886
6887 static bool ATTRIBUTE_UNUSED
6888 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6889 {
6890 HOST_WIDE_INT size = int_size_in_bytes (type);
6891
6892 /* __m128 is returned in xmm0. */
6893 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6894 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6895 return false;
6896
6897 /* Otherwise, the size must be exactly in [1248]. */
6898 return size != 1 && size != 2 && size != 4 && size != 8;
6899 }
6900
6901 static bool
6902 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6903 {
6904 #ifdef SUBTARGET_RETURN_IN_MEMORY
6905 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6906 #else
6907 const enum machine_mode mode = type_natural_mode (type, NULL);
6908
6909 if (TARGET_64BIT)
6910 {
6911 if (ix86_function_type_abi (fntype) == MS_ABI)
6912 return return_in_memory_ms_64 (type, mode);
6913 else
6914 return return_in_memory_64 (type, mode);
6915 }
6916 else
6917 return return_in_memory_32 (type, mode);
6918 #endif
6919 }
6920
6921 /* When returning SSE vector types, we have a choice of either
6922 (1) being abi incompatible with a -march switch, or
6923 (2) generating an error.
6924 Given no good solution, I think the safest thing is one warning.
6925 The user won't be able to use -Werror, but....
6926
6927 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6928 called in response to actually generating a caller or callee that
6929 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6930 via aggregate_value_p for general type probing from tree-ssa. */
6931
6932 static rtx
6933 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6934 {
6935 static bool warnedsse, warnedmmx;
6936
6937 if (!TARGET_64BIT && type)
6938 {
6939 /* Look at the return type of the function, not the function type. */
6940 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6941
6942 if (!TARGET_SSE && !warnedsse)
6943 {
6944 if (mode == TImode
6945 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6946 {
6947 warnedsse = true;
6948 warning (0, "SSE vector return without SSE enabled "
6949 "changes the ABI");
6950 }
6951 }
6952
6953 if (!TARGET_MMX && !warnedmmx)
6954 {
6955 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6956 {
6957 warnedmmx = true;
6958 warning (0, "MMX vector return without MMX enabled "
6959 "changes the ABI");
6960 }
6961 }
6962 }
6963
6964 return NULL;
6965 }
6966
6967 \f
6968 /* Create the va_list data type. */
6969
6970 /* Returns the calling convention specific va_list date type.
6971 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6972
6973 static tree
6974 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6975 {
6976 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6977
6978 /* For i386 we use plain pointer to argument area. */
6979 if (!TARGET_64BIT || abi == MS_ABI)
6980 return build_pointer_type (char_type_node);
6981
6982 record = lang_hooks.types.make_type (RECORD_TYPE);
6983 type_decl = build_decl (BUILTINS_LOCATION,
6984 TYPE_DECL, get_identifier ("__va_list_tag"), record);
6985
6986 f_gpr = build_decl (BUILTINS_LOCATION,
6987 FIELD_DECL, get_identifier ("gp_offset"),
6988 unsigned_type_node);
6989 f_fpr = build_decl (BUILTINS_LOCATION,
6990 FIELD_DECL, get_identifier ("fp_offset"),
6991 unsigned_type_node);
6992 f_ovf = build_decl (BUILTINS_LOCATION,
6993 FIELD_DECL, get_identifier ("overflow_arg_area"),
6994 ptr_type_node);
6995 f_sav = build_decl (BUILTINS_LOCATION,
6996 FIELD_DECL, get_identifier ("reg_save_area"),
6997 ptr_type_node);
6998
6999 va_list_gpr_counter_field = f_gpr;
7000 va_list_fpr_counter_field = f_fpr;
7001
7002 DECL_FIELD_CONTEXT (f_gpr) = record;
7003 DECL_FIELD_CONTEXT (f_fpr) = record;
7004 DECL_FIELD_CONTEXT (f_ovf) = record;
7005 DECL_FIELD_CONTEXT (f_sav) = record;
7006
7007 TREE_CHAIN (record) = type_decl;
7008 TYPE_NAME (record) = type_decl;
7009 TYPE_FIELDS (record) = f_gpr;
7010 DECL_CHAIN (f_gpr) = f_fpr;
7011 DECL_CHAIN (f_fpr) = f_ovf;
7012 DECL_CHAIN (f_ovf) = f_sav;
7013
7014 layout_type (record);
7015
7016 /* The correct type is an array type of one element. */
7017 return build_array_type (record, build_index_type (size_zero_node));
7018 }
7019
7020 /* Setup the builtin va_list data type and for 64-bit the additional
7021 calling convention specific va_list data types. */
7022
7023 static tree
7024 ix86_build_builtin_va_list (void)
7025 {
7026 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7027
7028 /* Initialize abi specific va_list builtin types. */
7029 if (TARGET_64BIT)
7030 {
7031 tree t;
7032 if (ix86_abi == MS_ABI)
7033 {
7034 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7035 if (TREE_CODE (t) != RECORD_TYPE)
7036 t = build_variant_type_copy (t);
7037 sysv_va_list_type_node = t;
7038 }
7039 else
7040 {
7041 t = ret;
7042 if (TREE_CODE (t) != RECORD_TYPE)
7043 t = build_variant_type_copy (t);
7044 sysv_va_list_type_node = t;
7045 }
7046 if (ix86_abi != MS_ABI)
7047 {
7048 t = ix86_build_builtin_va_list_abi (MS_ABI);
7049 if (TREE_CODE (t) != RECORD_TYPE)
7050 t = build_variant_type_copy (t);
7051 ms_va_list_type_node = t;
7052 }
7053 else
7054 {
7055 t = ret;
7056 if (TREE_CODE (t) != RECORD_TYPE)
7057 t = build_variant_type_copy (t);
7058 ms_va_list_type_node = t;
7059 }
7060 }
7061
7062 return ret;
7063 }
7064
7065 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7066
7067 static void
7068 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7069 {
7070 rtx save_area, mem;
7071 alias_set_type set;
7072 int i, max;
7073
7074 /* GPR size of varargs save area. */
7075 if (cfun->va_list_gpr_size)
7076 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7077 else
7078 ix86_varargs_gpr_size = 0;
7079
7080 /* FPR size of varargs save area. We don't need it if we don't pass
7081 anything in SSE registers. */
7082 if (TARGET_SSE && cfun->va_list_fpr_size)
7083 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7084 else
7085 ix86_varargs_fpr_size = 0;
7086
7087 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7088 return;
7089
7090 save_area = frame_pointer_rtx;
7091 set = get_varargs_alias_set ();
7092
7093 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7094 if (max > X86_64_REGPARM_MAX)
7095 max = X86_64_REGPARM_MAX;
7096
7097 for (i = cum->regno; i < max; i++)
7098 {
7099 mem = gen_rtx_MEM (Pmode,
7100 plus_constant (save_area, i * UNITS_PER_WORD));
7101 MEM_NOTRAP_P (mem) = 1;
7102 set_mem_alias_set (mem, set);
7103 emit_move_insn (mem, gen_rtx_REG (Pmode,
7104 x86_64_int_parameter_registers[i]));
7105 }
7106
7107 if (ix86_varargs_fpr_size)
7108 {
7109 enum machine_mode smode;
7110 rtx label, test;
7111
7112 /* Now emit code to save SSE registers. The AX parameter contains number
7113 of SSE parameter registers used to call this function, though all we
7114 actually check here is the zero/non-zero status. */
7115
7116 label = gen_label_rtx ();
7117 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7118 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7119 label));
7120
7121 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7122 we used movdqa (i.e. TImode) instead? Perhaps even better would
7123 be if we could determine the real mode of the data, via a hook
7124 into pass_stdarg. Ignore all that for now. */
7125 smode = V4SFmode;
7126 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7127 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7128
7129 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7130 if (max > X86_64_SSE_REGPARM_MAX)
7131 max = X86_64_SSE_REGPARM_MAX;
7132
7133 for (i = cum->sse_regno; i < max; ++i)
7134 {
7135 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7136 mem = gen_rtx_MEM (smode, mem);
7137 MEM_NOTRAP_P (mem) = 1;
7138 set_mem_alias_set (mem, set);
7139 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7140
7141 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7142 }
7143
7144 emit_label (label);
7145 }
7146 }
7147
7148 static void
7149 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7150 {
7151 alias_set_type set = get_varargs_alias_set ();
7152 int i;
7153
7154 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7155 {
7156 rtx reg, mem;
7157
7158 mem = gen_rtx_MEM (Pmode,
7159 plus_constant (virtual_incoming_args_rtx,
7160 i * UNITS_PER_WORD));
7161 MEM_NOTRAP_P (mem) = 1;
7162 set_mem_alias_set (mem, set);
7163
7164 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7165 emit_move_insn (mem, reg);
7166 }
7167 }
7168
7169 static void
7170 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7171 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7172 int no_rtl)
7173 {
7174 CUMULATIVE_ARGS next_cum;
7175 tree fntype;
7176
7177 /* This argument doesn't appear to be used anymore. Which is good,
7178 because the old code here didn't suppress rtl generation. */
7179 gcc_assert (!no_rtl);
7180
7181 if (!TARGET_64BIT)
7182 return;
7183
7184 fntype = TREE_TYPE (current_function_decl);
7185
7186 /* For varargs, we do not want to skip the dummy va_dcl argument.
7187 For stdargs, we do want to skip the last named argument. */
7188 next_cum = *cum;
7189 if (stdarg_p (fntype))
7190 ix86_function_arg_advance (&next_cum, mode, type, true);
7191
7192 if (cum->call_abi == MS_ABI)
7193 setup_incoming_varargs_ms_64 (&next_cum);
7194 else
7195 setup_incoming_varargs_64 (&next_cum);
7196 }
7197
7198 /* Checks if TYPE is of kind va_list char *. */
7199
7200 static bool
7201 is_va_list_char_pointer (tree type)
7202 {
7203 tree canonic;
7204
7205 /* For 32-bit it is always true. */
7206 if (!TARGET_64BIT)
7207 return true;
7208 canonic = ix86_canonical_va_list_type (type);
7209 return (canonic == ms_va_list_type_node
7210 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7211 }
7212
7213 /* Implement va_start. */
7214
7215 static void
7216 ix86_va_start (tree valist, rtx nextarg)
7217 {
7218 HOST_WIDE_INT words, n_gpr, n_fpr;
7219 tree f_gpr, f_fpr, f_ovf, f_sav;
7220 tree gpr, fpr, ovf, sav, t;
7221 tree type;
7222
7223 rtx ovf_rtx;
7224
7225 if (flag_split_stack
7226 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7227 {
7228 unsigned int scratch_regno;
7229
7230 /* When we are splitting the stack, we can't refer to the stack
7231 arguments using internal_arg_pointer, because they may be on
7232 the old stack. The split stack prologue will arrange to
7233 leave a pointer to the old stack arguments in a scratch
7234 register, which we here copy to a pseudo-register. The split
7235 stack prologue can't set the pseudo-register directly because
7236 it (the prologue) runs before any registers have been saved. */
7237
7238 scratch_regno = split_stack_prologue_scratch_regno ();
7239 if (scratch_regno != INVALID_REGNUM)
7240 {
7241 rtx reg, seq;
7242
7243 reg = gen_reg_rtx (Pmode);
7244 cfun->machine->split_stack_varargs_pointer = reg;
7245
7246 start_sequence ();
7247 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7248 seq = get_insns ();
7249 end_sequence ();
7250
7251 push_topmost_sequence ();
7252 emit_insn_after (seq, entry_of_function ());
7253 pop_topmost_sequence ();
7254 }
7255 }
7256
7257 /* Only 64bit target needs something special. */
7258 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7259 {
7260 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7261 std_expand_builtin_va_start (valist, nextarg);
7262 else
7263 {
7264 rtx va_r, next;
7265
7266 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7267 next = expand_binop (ptr_mode, add_optab,
7268 cfun->machine->split_stack_varargs_pointer,
7269 crtl->args.arg_offset_rtx,
7270 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7271 convert_move (va_r, next, 0);
7272 }
7273 return;
7274 }
7275
7276 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7277 f_fpr = DECL_CHAIN (f_gpr);
7278 f_ovf = DECL_CHAIN (f_fpr);
7279 f_sav = DECL_CHAIN (f_ovf);
7280
7281 valist = build_simple_mem_ref (valist);
7282 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7283 /* The following should be folded into the MEM_REF offset. */
7284 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7285 f_gpr, NULL_TREE);
7286 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7287 f_fpr, NULL_TREE);
7288 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7289 f_ovf, NULL_TREE);
7290 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7291 f_sav, NULL_TREE);
7292
7293 /* Count number of gp and fp argument registers used. */
7294 words = crtl->args.info.words;
7295 n_gpr = crtl->args.info.regno;
7296 n_fpr = crtl->args.info.sse_regno;
7297
7298 if (cfun->va_list_gpr_size)
7299 {
7300 type = TREE_TYPE (gpr);
7301 t = build2 (MODIFY_EXPR, type,
7302 gpr, build_int_cst (type, n_gpr * 8));
7303 TREE_SIDE_EFFECTS (t) = 1;
7304 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7305 }
7306
7307 if (TARGET_SSE && cfun->va_list_fpr_size)
7308 {
7309 type = TREE_TYPE (fpr);
7310 t = build2 (MODIFY_EXPR, type, fpr,
7311 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7312 TREE_SIDE_EFFECTS (t) = 1;
7313 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7314 }
7315
7316 /* Find the overflow area. */
7317 type = TREE_TYPE (ovf);
7318 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7319 ovf_rtx = crtl->args.internal_arg_pointer;
7320 else
7321 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
7322 t = make_tree (type, ovf_rtx);
7323 if (words != 0)
7324 t = build2 (POINTER_PLUS_EXPR, type, t,
7325 size_int (words * UNITS_PER_WORD));
7326 t = build2 (MODIFY_EXPR, type, ovf, t);
7327 TREE_SIDE_EFFECTS (t) = 1;
7328 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7329
7330 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7331 {
7332 /* Find the register save area.
7333 Prologue of the function save it right above stack frame. */
7334 type = TREE_TYPE (sav);
7335 t = make_tree (type, frame_pointer_rtx);
7336 if (!ix86_varargs_gpr_size)
7337 t = build2 (POINTER_PLUS_EXPR, type, t,
7338 size_int (-8 * X86_64_REGPARM_MAX));
7339 t = build2 (MODIFY_EXPR, type, sav, t);
7340 TREE_SIDE_EFFECTS (t) = 1;
7341 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7342 }
7343 }
7344
7345 /* Implement va_arg. */
7346
7347 static tree
7348 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7349 gimple_seq *post_p)
7350 {
7351 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7352 tree f_gpr, f_fpr, f_ovf, f_sav;
7353 tree gpr, fpr, ovf, sav, t;
7354 int size, rsize;
7355 tree lab_false, lab_over = NULL_TREE;
7356 tree addr, t2;
7357 rtx container;
7358 int indirect_p = 0;
7359 tree ptrtype;
7360 enum machine_mode nat_mode;
7361 unsigned int arg_boundary;
7362
7363 /* Only 64bit target needs something special. */
7364 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7365 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7366
7367 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7368 f_fpr = DECL_CHAIN (f_gpr);
7369 f_ovf = DECL_CHAIN (f_fpr);
7370 f_sav = DECL_CHAIN (f_ovf);
7371
7372 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7373 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7374 valist = build_va_arg_indirect_ref (valist);
7375 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7376 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7377 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7378
7379 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7380 if (indirect_p)
7381 type = build_pointer_type (type);
7382 size = int_size_in_bytes (type);
7383 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7384
7385 nat_mode = type_natural_mode (type, NULL);
7386 switch (nat_mode)
7387 {
7388 case V8SFmode:
7389 case V8SImode:
7390 case V32QImode:
7391 case V16HImode:
7392 case V4DFmode:
7393 case V4DImode:
7394 /* Unnamed 256bit vector mode parameters are passed on stack. */
7395 if (ix86_cfun_abi () == SYSV_ABI)
7396 {
7397 container = NULL;
7398 break;
7399 }
7400
7401 default:
7402 container = construct_container (nat_mode, TYPE_MODE (type),
7403 type, 0, X86_64_REGPARM_MAX,
7404 X86_64_SSE_REGPARM_MAX, intreg,
7405 0);
7406 break;
7407 }
7408
7409 /* Pull the value out of the saved registers. */
7410
7411 addr = create_tmp_var (ptr_type_node, "addr");
7412
7413 if (container)
7414 {
7415 int needed_intregs, needed_sseregs;
7416 bool need_temp;
7417 tree int_addr, sse_addr;
7418
7419 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7420 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7421
7422 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7423
7424 need_temp = (!REG_P (container)
7425 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7426 || TYPE_ALIGN (type) > 128));
7427
7428 /* In case we are passing structure, verify that it is consecutive block
7429 on the register save area. If not we need to do moves. */
7430 if (!need_temp && !REG_P (container))
7431 {
7432 /* Verify that all registers are strictly consecutive */
7433 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7434 {
7435 int i;
7436
7437 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7438 {
7439 rtx slot = XVECEXP (container, 0, i);
7440 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7441 || INTVAL (XEXP (slot, 1)) != i * 16)
7442 need_temp = 1;
7443 }
7444 }
7445 else
7446 {
7447 int i;
7448
7449 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7450 {
7451 rtx slot = XVECEXP (container, 0, i);
7452 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7453 || INTVAL (XEXP (slot, 1)) != i * 8)
7454 need_temp = 1;
7455 }
7456 }
7457 }
7458 if (!need_temp)
7459 {
7460 int_addr = addr;
7461 sse_addr = addr;
7462 }
7463 else
7464 {
7465 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7466 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7467 }
7468
7469 /* First ensure that we fit completely in registers. */
7470 if (needed_intregs)
7471 {
7472 t = build_int_cst (TREE_TYPE (gpr),
7473 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7474 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7475 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7476 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7477 gimplify_and_add (t, pre_p);
7478 }
7479 if (needed_sseregs)
7480 {
7481 t = build_int_cst (TREE_TYPE (fpr),
7482 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7483 + X86_64_REGPARM_MAX * 8);
7484 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7485 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7486 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7487 gimplify_and_add (t, pre_p);
7488 }
7489
7490 /* Compute index to start of area used for integer regs. */
7491 if (needed_intregs)
7492 {
7493 /* int_addr = gpr + sav; */
7494 t = fold_convert (sizetype, gpr);
7495 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7496 gimplify_assign (int_addr, t, pre_p);
7497 }
7498 if (needed_sseregs)
7499 {
7500 /* sse_addr = fpr + sav; */
7501 t = fold_convert (sizetype, fpr);
7502 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7503 gimplify_assign (sse_addr, t, pre_p);
7504 }
7505 if (need_temp)
7506 {
7507 int i, prev_size = 0;
7508 tree temp = create_tmp_var (type, "va_arg_tmp");
7509
7510 /* addr = &temp; */
7511 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7512 gimplify_assign (addr, t, pre_p);
7513
7514 for (i = 0; i < XVECLEN (container, 0); i++)
7515 {
7516 rtx slot = XVECEXP (container, 0, i);
7517 rtx reg = XEXP (slot, 0);
7518 enum machine_mode mode = GET_MODE (reg);
7519 tree piece_type;
7520 tree addr_type;
7521 tree daddr_type;
7522 tree src_addr, src;
7523 int src_offset;
7524 tree dest_addr, dest;
7525 int cur_size = GET_MODE_SIZE (mode);
7526
7527 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
7528 prev_size = INTVAL (XEXP (slot, 1));
7529 if (prev_size + cur_size > size)
7530 {
7531 cur_size = size - prev_size;
7532 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
7533 if (mode == BLKmode)
7534 mode = QImode;
7535 }
7536 piece_type = lang_hooks.types.type_for_mode (mode, 1);
7537 if (mode == GET_MODE (reg))
7538 addr_type = build_pointer_type (piece_type);
7539 else
7540 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7541 true);
7542 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7543 true);
7544
7545 if (SSE_REGNO_P (REGNO (reg)))
7546 {
7547 src_addr = sse_addr;
7548 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7549 }
7550 else
7551 {
7552 src_addr = int_addr;
7553 src_offset = REGNO (reg) * 8;
7554 }
7555 src_addr = fold_convert (addr_type, src_addr);
7556 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7557 size_int (src_offset));
7558
7559 dest_addr = fold_convert (daddr_type, addr);
7560 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7561 size_int (prev_size));
7562 if (cur_size == GET_MODE_SIZE (mode))
7563 {
7564 src = build_va_arg_indirect_ref (src_addr);
7565 dest = build_va_arg_indirect_ref (dest_addr);
7566
7567 gimplify_assign (dest, src, pre_p);
7568 }
7569 else
7570 {
7571 tree copy
7572 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
7573 3, dest_addr, src_addr,
7574 size_int (cur_size));
7575 gimplify_and_add (copy, pre_p);
7576 }
7577 prev_size += cur_size;
7578 }
7579 }
7580
7581 if (needed_intregs)
7582 {
7583 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7584 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7585 gimplify_assign (gpr, t, pre_p);
7586 }
7587
7588 if (needed_sseregs)
7589 {
7590 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7591 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7592 gimplify_assign (fpr, t, pre_p);
7593 }
7594
7595 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7596
7597 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7598 }
7599
7600 /* ... otherwise out of the overflow area. */
7601
7602 /* When we align parameter on stack for caller, if the parameter
7603 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7604 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7605 here with caller. */
7606 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7607 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7608 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7609
7610 /* Care for on-stack alignment if needed. */
7611 if (arg_boundary <= 64 || size == 0)
7612 t = ovf;
7613 else
7614 {
7615 HOST_WIDE_INT align = arg_boundary / 8;
7616 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7617 size_int (align - 1));
7618 t = fold_convert (sizetype, t);
7619 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7620 size_int (-align));
7621 t = fold_convert (TREE_TYPE (ovf), t);
7622 }
7623
7624 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7625 gimplify_assign (addr, t, pre_p);
7626
7627 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7628 size_int (rsize * UNITS_PER_WORD));
7629 gimplify_assign (unshare_expr (ovf), t, pre_p);
7630
7631 if (container)
7632 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7633
7634 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
7635 addr = fold_convert (ptrtype, addr);
7636
7637 if (indirect_p)
7638 addr = build_va_arg_indirect_ref (addr);
7639 return build_va_arg_indirect_ref (addr);
7640 }
7641 \f
7642 /* Return true if OPNUM's MEM should be matched
7643 in movabs* patterns. */
7644
7645 bool
7646 ix86_check_movabs (rtx insn, int opnum)
7647 {
7648 rtx set, mem;
7649
7650 set = PATTERN (insn);
7651 if (GET_CODE (set) == PARALLEL)
7652 set = XVECEXP (set, 0, 0);
7653 gcc_assert (GET_CODE (set) == SET);
7654 mem = XEXP (set, opnum);
7655 while (GET_CODE (mem) == SUBREG)
7656 mem = SUBREG_REG (mem);
7657 gcc_assert (MEM_P (mem));
7658 return volatile_ok || !MEM_VOLATILE_P (mem);
7659 }
7660 \f
7661 /* Initialize the table of extra 80387 mathematical constants. */
7662
7663 static void
7664 init_ext_80387_constants (void)
7665 {
7666 static const char * cst[5] =
7667 {
7668 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7669 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7670 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7671 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7672 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7673 };
7674 int i;
7675
7676 for (i = 0; i < 5; i++)
7677 {
7678 real_from_string (&ext_80387_constants_table[i], cst[i]);
7679 /* Ensure each constant is rounded to XFmode precision. */
7680 real_convert (&ext_80387_constants_table[i],
7681 XFmode, &ext_80387_constants_table[i]);
7682 }
7683
7684 ext_80387_constants_init = 1;
7685 }
7686
7687 /* Return non-zero if the constant is something that
7688 can be loaded with a special instruction. */
7689
7690 int
7691 standard_80387_constant_p (rtx x)
7692 {
7693 enum machine_mode mode = GET_MODE (x);
7694
7695 REAL_VALUE_TYPE r;
7696
7697 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7698 return -1;
7699
7700 if (x == CONST0_RTX (mode))
7701 return 1;
7702 if (x == CONST1_RTX (mode))
7703 return 2;
7704
7705 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7706
7707 /* For XFmode constants, try to find a special 80387 instruction when
7708 optimizing for size or on those CPUs that benefit from them. */
7709 if (mode == XFmode
7710 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7711 {
7712 int i;
7713
7714 if (! ext_80387_constants_init)
7715 init_ext_80387_constants ();
7716
7717 for (i = 0; i < 5; i++)
7718 if (real_identical (&r, &ext_80387_constants_table[i]))
7719 return i + 3;
7720 }
7721
7722 /* Load of the constant -0.0 or -1.0 will be split as
7723 fldz;fchs or fld1;fchs sequence. */
7724 if (real_isnegzero (&r))
7725 return 8;
7726 if (real_identical (&r, &dconstm1))
7727 return 9;
7728
7729 return 0;
7730 }
7731
7732 /* Return the opcode of the special instruction to be used to load
7733 the constant X. */
7734
7735 const char *
7736 standard_80387_constant_opcode (rtx x)
7737 {
7738 switch (standard_80387_constant_p (x))
7739 {
7740 case 1:
7741 return "fldz";
7742 case 2:
7743 return "fld1";
7744 case 3:
7745 return "fldlg2";
7746 case 4:
7747 return "fldln2";
7748 case 5:
7749 return "fldl2e";
7750 case 6:
7751 return "fldl2t";
7752 case 7:
7753 return "fldpi";
7754 case 8:
7755 case 9:
7756 return "#";
7757 default:
7758 gcc_unreachable ();
7759 }
7760 }
7761
7762 /* Return the CONST_DOUBLE representing the 80387 constant that is
7763 loaded by the specified special instruction. The argument IDX
7764 matches the return value from standard_80387_constant_p. */
7765
7766 rtx
7767 standard_80387_constant_rtx (int idx)
7768 {
7769 int i;
7770
7771 if (! ext_80387_constants_init)
7772 init_ext_80387_constants ();
7773
7774 switch (idx)
7775 {
7776 case 3:
7777 case 4:
7778 case 5:
7779 case 6:
7780 case 7:
7781 i = idx - 3;
7782 break;
7783
7784 default:
7785 gcc_unreachable ();
7786 }
7787
7788 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7789 XFmode);
7790 }
7791
7792 /* Return 1 if X is all 0s and 2 if x is all 1s
7793 in supported SSE vector mode. */
7794
7795 int
7796 standard_sse_constant_p (rtx x)
7797 {
7798 enum machine_mode mode = GET_MODE (x);
7799
7800 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7801 return 1;
7802 if (vector_all_ones_operand (x, mode))
7803 switch (mode)
7804 {
7805 case V16QImode:
7806 case V8HImode:
7807 case V4SImode:
7808 case V2DImode:
7809 if (TARGET_SSE2)
7810 return 2;
7811 default:
7812 break;
7813 }
7814
7815 return 0;
7816 }
7817
7818 /* Return the opcode of the special instruction to be used to load
7819 the constant X. */
7820
7821 const char *
7822 standard_sse_constant_opcode (rtx insn, rtx x)
7823 {
7824 switch (standard_sse_constant_p (x))
7825 {
7826 case 1:
7827 switch (get_attr_mode (insn))
7828 {
7829 case MODE_V4SF:
7830 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7831 case MODE_V2DF:
7832 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7833 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7834 else
7835 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7836 case MODE_TI:
7837 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7838 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7839 else
7840 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7841 case MODE_V8SF:
7842 return "vxorps\t%x0, %x0, %x0";
7843 case MODE_V4DF:
7844 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7845 return "vxorps\t%x0, %x0, %x0";
7846 else
7847 return "vxorpd\t%x0, %x0, %x0";
7848 case MODE_OI:
7849 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7850 return "vxorps\t%x0, %x0, %x0";
7851 else
7852 return "vpxor\t%x0, %x0, %x0";
7853 default:
7854 break;
7855 }
7856 case 2:
7857 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
7858 default:
7859 break;
7860 }
7861 gcc_unreachable ();
7862 }
7863
7864 /* Returns true if OP contains a symbol reference */
7865
7866 bool
7867 symbolic_reference_mentioned_p (rtx op)
7868 {
7869 const char *fmt;
7870 int i;
7871
7872 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7873 return true;
7874
7875 fmt = GET_RTX_FORMAT (GET_CODE (op));
7876 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7877 {
7878 if (fmt[i] == 'E')
7879 {
7880 int j;
7881
7882 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7883 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7884 return true;
7885 }
7886
7887 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7888 return true;
7889 }
7890
7891 return false;
7892 }
7893
7894 /* Return true if it is appropriate to emit `ret' instructions in the
7895 body of a function. Do this only if the epilogue is simple, needing a
7896 couple of insns. Prior to reloading, we can't tell how many registers
7897 must be saved, so return false then. Return false if there is no frame
7898 marker to de-allocate. */
7899
7900 bool
7901 ix86_can_use_return_insn_p (void)
7902 {
7903 struct ix86_frame frame;
7904
7905 if (! reload_completed || frame_pointer_needed)
7906 return 0;
7907
7908 /* Don't allow more than 32k pop, since that's all we can do
7909 with one instruction. */
7910 if (crtl->args.pops_args && crtl->args.size >= 32768)
7911 return 0;
7912
7913 ix86_compute_frame_layout (&frame);
7914 return (frame.stack_pointer_offset == UNITS_PER_WORD
7915 && (frame.nregs + frame.nsseregs) == 0);
7916 }
7917 \f
7918 /* Value should be nonzero if functions must have frame pointers.
7919 Zero means the frame pointer need not be set up (and parms may
7920 be accessed via the stack pointer) in functions that seem suitable. */
7921
7922 static bool
7923 ix86_frame_pointer_required (void)
7924 {
7925 /* If we accessed previous frames, then the generated code expects
7926 to be able to access the saved ebp value in our frame. */
7927 if (cfun->machine->accesses_prev_frame)
7928 return true;
7929
7930 /* Several x86 os'es need a frame pointer for other reasons,
7931 usually pertaining to setjmp. */
7932 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7933 return true;
7934
7935 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
7936 turns off the frame pointer by default. Turn it back on now if
7937 we've not got a leaf function. */
7938 if (TARGET_OMIT_LEAF_FRAME_POINTER
7939 && (!current_function_is_leaf
7940 || ix86_current_function_calls_tls_descriptor))
7941 return true;
7942
7943 if (crtl->profile && !flag_fentry)
7944 return true;
7945
7946 return false;
7947 }
7948
7949 /* Record that the current function accesses previous call frames. */
7950
7951 void
7952 ix86_setup_frame_addresses (void)
7953 {
7954 cfun->machine->accesses_prev_frame = 1;
7955 }
7956 \f
7957 #ifndef USE_HIDDEN_LINKONCE
7958 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7959 # define USE_HIDDEN_LINKONCE 1
7960 # else
7961 # define USE_HIDDEN_LINKONCE 0
7962 # endif
7963 #endif
7964
7965 static int pic_labels_used;
7966
7967 /* Fills in the label name that should be used for a pc thunk for
7968 the given register. */
7969
7970 static void
7971 get_pc_thunk_name (char name[32], unsigned int regno)
7972 {
7973 gcc_assert (!TARGET_64BIT);
7974
7975 if (USE_HIDDEN_LINKONCE)
7976 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7977 else
7978 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7979 }
7980
7981
7982 /* This function generates code for -fpic that loads %ebx with
7983 the return address of the caller and then returns. */
7984
7985 static void
7986 ix86_code_end (void)
7987 {
7988 rtx xops[2];
7989 int regno;
7990
7991 for (regno = AX_REG; regno <= SP_REG; regno++)
7992 {
7993 char name[32];
7994 tree decl;
7995
7996 if (!(pic_labels_used & (1 << regno)))
7997 continue;
7998
7999 get_pc_thunk_name (name, regno);
8000
8001 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8002 get_identifier (name),
8003 build_function_type (void_type_node, void_list_node));
8004 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8005 NULL_TREE, void_type_node);
8006 TREE_PUBLIC (decl) = 1;
8007 TREE_STATIC (decl) = 1;
8008
8009 #if TARGET_MACHO
8010 if (TARGET_MACHO)
8011 {
8012 switch_to_section (darwin_sections[text_coal_section]);
8013 fputs ("\t.weak_definition\t", asm_out_file);
8014 assemble_name (asm_out_file, name);
8015 fputs ("\n\t.private_extern\t", asm_out_file);
8016 assemble_name (asm_out_file, name);
8017 putc ('\n', asm_out_file);
8018 ASM_OUTPUT_LABEL (asm_out_file, name);
8019 DECL_WEAK (decl) = 1;
8020 }
8021 else
8022 #endif
8023 if (USE_HIDDEN_LINKONCE)
8024 {
8025 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8026
8027 targetm.asm_out.unique_section (decl, 0);
8028 switch_to_section (get_named_section (decl, NULL, 0));
8029
8030 targetm.asm_out.globalize_label (asm_out_file, name);
8031 fputs ("\t.hidden\t", asm_out_file);
8032 assemble_name (asm_out_file, name);
8033 putc ('\n', asm_out_file);
8034 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8035 }
8036 else
8037 {
8038 switch_to_section (text_section);
8039 ASM_OUTPUT_LABEL (asm_out_file, name);
8040 }
8041
8042 DECL_INITIAL (decl) = make_node (BLOCK);
8043 current_function_decl = decl;
8044 init_function_start (decl);
8045 first_function_block_is_cold = false;
8046 /* Make sure unwind info is emitted for the thunk if needed. */
8047 final_start_function (emit_barrier (), asm_out_file, 1);
8048
8049 /* Pad stack IP move with 4 instructions (two NOPs count
8050 as one instruction). */
8051 if (TARGET_PAD_SHORT_FUNCTION)
8052 {
8053 int i = 8;
8054
8055 while (i--)
8056 fputs ("\tnop\n", asm_out_file);
8057 }
8058
8059 xops[0] = gen_rtx_REG (Pmode, regno);
8060 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8061 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8062 fputs ("\tret\n", asm_out_file);
8063 final_end_function ();
8064 init_insn_lengths ();
8065 free_after_compilation (cfun);
8066 set_cfun (NULL);
8067 current_function_decl = NULL;
8068 }
8069
8070 if (flag_split_stack)
8071 file_end_indicate_split_stack ();
8072 }
8073
8074 /* Emit code for the SET_GOT patterns. */
8075
8076 const char *
8077 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8078 {
8079 rtx xops[3];
8080
8081 xops[0] = dest;
8082
8083 if (TARGET_VXWORKS_RTP && flag_pic)
8084 {
8085 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8086 xops[2] = gen_rtx_MEM (Pmode,
8087 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8088 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8089
8090 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8091 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8092 an unadorned address. */
8093 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8094 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8095 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8096 return "";
8097 }
8098
8099 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8100
8101 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
8102 {
8103 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8104
8105 if (!flag_pic)
8106 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8107 else
8108 {
8109 output_asm_insn ("call\t%a2", xops);
8110 #ifdef DWARF2_UNWIND_INFO
8111 /* The call to next label acts as a push. */
8112 if (dwarf2out_do_frame ())
8113 {
8114 rtx insn;
8115 start_sequence ();
8116 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8117 gen_rtx_PLUS (Pmode,
8118 stack_pointer_rtx,
8119 GEN_INT (-4))));
8120 RTX_FRAME_RELATED_P (insn) = 1;
8121 dwarf2out_frame_debug (insn, true);
8122 end_sequence ();
8123 }
8124 #endif
8125 }
8126
8127 #if TARGET_MACHO
8128 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8129 is what will be referenced by the Mach-O PIC subsystem. */
8130 if (!label)
8131 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8132 #endif
8133
8134 targetm.asm_out.internal_label (asm_out_file, "L",
8135 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8136
8137 if (flag_pic)
8138 {
8139 output_asm_insn ("pop%z0\t%0", xops);
8140 #ifdef DWARF2_UNWIND_INFO
8141 /* The pop is a pop and clobbers dest, but doesn't restore it
8142 for unwind info purposes. */
8143 if (dwarf2out_do_frame ())
8144 {
8145 rtx insn;
8146 start_sequence ();
8147 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
8148 dwarf2out_frame_debug (insn, true);
8149 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8150 gen_rtx_PLUS (Pmode,
8151 stack_pointer_rtx,
8152 GEN_INT (4))));
8153 RTX_FRAME_RELATED_P (insn) = 1;
8154 dwarf2out_frame_debug (insn, true);
8155 end_sequence ();
8156 }
8157 #endif
8158 }
8159 }
8160 else
8161 {
8162 char name[32];
8163 get_pc_thunk_name (name, REGNO (dest));
8164 pic_labels_used |= 1 << REGNO (dest);
8165
8166 #ifdef DWARF2_UNWIND_INFO
8167 /* Ensure all queued register saves are flushed before the
8168 call. */
8169 if (dwarf2out_do_frame ())
8170 dwarf2out_flush_queued_reg_saves ();
8171 #endif
8172 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8173 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8174 output_asm_insn ("call\t%X2", xops);
8175 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8176 is what will be referenced by the Mach-O PIC subsystem. */
8177 #if TARGET_MACHO
8178 if (!label)
8179 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8180 else
8181 targetm.asm_out.internal_label (asm_out_file, "L",
8182 CODE_LABEL_NUMBER (label));
8183 #endif
8184 }
8185
8186 if (TARGET_MACHO)
8187 return "";
8188
8189 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
8190 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8191 else
8192 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
8193
8194 return "";
8195 }
8196
8197 /* Generate an "push" pattern for input ARG. */
8198
8199 static rtx
8200 gen_push (rtx arg)
8201 {
8202 struct machine_function *m = cfun->machine;
8203
8204 if (m->fs.cfa_reg == stack_pointer_rtx)
8205 m->fs.cfa_offset += UNITS_PER_WORD;
8206 m->fs.sp_offset += UNITS_PER_WORD;
8207
8208 return gen_rtx_SET (VOIDmode,
8209 gen_rtx_MEM (Pmode,
8210 gen_rtx_PRE_DEC (Pmode,
8211 stack_pointer_rtx)),
8212 arg);
8213 }
8214
8215 /* Generate an "pop" pattern for input ARG. */
8216
8217 static rtx
8218 gen_pop (rtx arg)
8219 {
8220 return gen_rtx_SET (VOIDmode,
8221 arg,
8222 gen_rtx_MEM (Pmode,
8223 gen_rtx_POST_INC (Pmode,
8224 stack_pointer_rtx)));
8225 }
8226
8227 /* Return >= 0 if there is an unused call-clobbered register available
8228 for the entire function. */
8229
8230 static unsigned int
8231 ix86_select_alt_pic_regnum (void)
8232 {
8233 if (current_function_is_leaf
8234 && !crtl->profile
8235 && !ix86_current_function_calls_tls_descriptor)
8236 {
8237 int i, drap;
8238 /* Can't use the same register for both PIC and DRAP. */
8239 if (crtl->drap_reg)
8240 drap = REGNO (crtl->drap_reg);
8241 else
8242 drap = -1;
8243 for (i = 2; i >= 0; --i)
8244 if (i != drap && !df_regs_ever_live_p (i))
8245 return i;
8246 }
8247
8248 return INVALID_REGNUM;
8249 }
8250
8251 /* Return 1 if we need to save REGNO. */
8252 static int
8253 ix86_save_reg (unsigned int regno, int maybe_eh_return)
8254 {
8255 if (pic_offset_table_rtx
8256 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8257 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8258 || crtl->profile
8259 || crtl->calls_eh_return
8260 || crtl->uses_const_pool))
8261 {
8262 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
8263 return 0;
8264 return 1;
8265 }
8266
8267 if (crtl->calls_eh_return && maybe_eh_return)
8268 {
8269 unsigned i;
8270 for (i = 0; ; i++)
8271 {
8272 unsigned test = EH_RETURN_DATA_REGNO (i);
8273 if (test == INVALID_REGNUM)
8274 break;
8275 if (test == regno)
8276 return 1;
8277 }
8278 }
8279
8280 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8281 return 1;
8282
8283 return (df_regs_ever_live_p (regno)
8284 && !call_used_regs[regno]
8285 && !fixed_regs[regno]
8286 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8287 }
8288
8289 /* Return number of saved general prupose registers. */
8290
8291 static int
8292 ix86_nsaved_regs (void)
8293 {
8294 int nregs = 0;
8295 int regno;
8296
8297 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8298 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8299 nregs ++;
8300 return nregs;
8301 }
8302
8303 /* Return number of saved SSE registrers. */
8304
8305 static int
8306 ix86_nsaved_sseregs (void)
8307 {
8308 int nregs = 0;
8309 int regno;
8310
8311 if (ix86_cfun_abi () != MS_ABI)
8312 return 0;
8313 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8314 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8315 nregs ++;
8316 return nregs;
8317 }
8318
8319 /* Given FROM and TO register numbers, say whether this elimination is
8320 allowed. If stack alignment is needed, we can only replace argument
8321 pointer with hard frame pointer, or replace frame pointer with stack
8322 pointer. Otherwise, frame pointer elimination is automatically
8323 handled and all other eliminations are valid. */
8324
8325 static bool
8326 ix86_can_eliminate (const int from, const int to)
8327 {
8328 if (stack_realign_fp)
8329 return ((from == ARG_POINTER_REGNUM
8330 && to == HARD_FRAME_POINTER_REGNUM)
8331 || (from == FRAME_POINTER_REGNUM
8332 && to == STACK_POINTER_REGNUM));
8333 else
8334 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8335 }
8336
8337 /* Return the offset between two registers, one to be eliminated, and the other
8338 its replacement, at the start of a routine. */
8339
8340 HOST_WIDE_INT
8341 ix86_initial_elimination_offset (int from, int to)
8342 {
8343 struct ix86_frame frame;
8344 ix86_compute_frame_layout (&frame);
8345
8346 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8347 return frame.hard_frame_pointer_offset;
8348 else if (from == FRAME_POINTER_REGNUM
8349 && to == HARD_FRAME_POINTER_REGNUM)
8350 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8351 else
8352 {
8353 gcc_assert (to == STACK_POINTER_REGNUM);
8354
8355 if (from == ARG_POINTER_REGNUM)
8356 return frame.stack_pointer_offset;
8357
8358 gcc_assert (from == FRAME_POINTER_REGNUM);
8359 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8360 }
8361 }
8362
8363 /* In a dynamically-aligned function, we can't know the offset from
8364 stack pointer to frame pointer, so we must ensure that setjmp
8365 eliminates fp against the hard fp (%ebp) rather than trying to
8366 index from %esp up to the top of the frame across a gap that is
8367 of unknown (at compile-time) size. */
8368 static rtx
8369 ix86_builtin_setjmp_frame_value (void)
8370 {
8371 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8372 }
8373
8374 /* On the x86 -fsplit-stack and -fstack-protector both use the same
8375 field in the TCB, so they can not be used together. */
8376
8377 static bool
8378 ix86_supports_split_stack (bool report ATTRIBUTE_UNUSED)
8379 {
8380 bool ret = true;
8381
8382 #ifndef TARGET_THREAD_SPLIT_STACK_OFFSET
8383 if (report)
8384 error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
8385 ret = false;
8386 #else
8387 if (!HAVE_GAS_CFI_PERSONALITY_DIRECTIVE)
8388 {
8389 if (report)
8390 error ("%<-fsplit-stack%> requires "
8391 "assembler support for CFI directives");
8392 ret = false;
8393 }
8394 #endif
8395
8396 return ret;
8397 }
8398
8399 /* When using -fsplit-stack, the allocation routines set a field in
8400 the TCB to the bottom of the stack plus this much space, measured
8401 in bytes. */
8402
8403 #define SPLIT_STACK_AVAILABLE 256
8404
8405 /* Fill structure ix86_frame about frame of currently computed function. */
8406
8407 static void
8408 ix86_compute_frame_layout (struct ix86_frame *frame)
8409 {
8410 unsigned int stack_alignment_needed;
8411 HOST_WIDE_INT offset;
8412 unsigned int preferred_alignment;
8413 HOST_WIDE_INT size = get_frame_size ();
8414 HOST_WIDE_INT to_allocate;
8415
8416 frame->nregs = ix86_nsaved_regs ();
8417 frame->nsseregs = ix86_nsaved_sseregs ();
8418
8419 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8420 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8421
8422 /* MS ABI seem to require stack alignment to be always 16 except for function
8423 prologues and leaf. */
8424 if ((ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
8425 && (!current_function_is_leaf || cfun->calls_alloca != 0
8426 || ix86_current_function_calls_tls_descriptor))
8427 {
8428 preferred_alignment = 16;
8429 stack_alignment_needed = 16;
8430 crtl->preferred_stack_boundary = 128;
8431 crtl->stack_alignment_needed = 128;
8432 }
8433
8434 gcc_assert (!size || stack_alignment_needed);
8435 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8436 gcc_assert (preferred_alignment <= stack_alignment_needed);
8437
8438 /* During reload iteration the amount of registers saved can change.
8439 Recompute the value as needed. Do not recompute when amount of registers
8440 didn't change as reload does multiple calls to the function and does not
8441 expect the decision to change within single iteration. */
8442 if (!optimize_function_for_size_p (cfun)
8443 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8444 {
8445 int count = frame->nregs;
8446 struct cgraph_node *node = cgraph_node (current_function_decl);
8447
8448 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8449 /* The fast prologue uses move instead of push to save registers. This
8450 is significantly longer, but also executes faster as modern hardware
8451 can execute the moves in parallel, but can't do that for push/pop.
8452
8453 Be careful about choosing what prologue to emit: When function takes
8454 many instructions to execute we may use slow version as well as in
8455 case function is known to be outside hot spot (this is known with
8456 feedback only). Weight the size of function by number of registers
8457 to save as it is cheap to use one or two push instructions but very
8458 slow to use many of them. */
8459 if (count)
8460 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8461 if (node->frequency < NODE_FREQUENCY_NORMAL
8462 || (flag_branch_probabilities
8463 && node->frequency < NODE_FREQUENCY_HOT))
8464 cfun->machine->use_fast_prologue_epilogue = false;
8465 else
8466 cfun->machine->use_fast_prologue_epilogue
8467 = !expensive_function_p (count);
8468 }
8469 if (TARGET_PROLOGUE_USING_MOVE
8470 && cfun->machine->use_fast_prologue_epilogue)
8471 frame->save_regs_using_mov = true;
8472 else
8473 frame->save_regs_using_mov = false;
8474
8475 /* If static stack checking is enabled and done with probes, the registers
8476 need to be saved before allocating the frame. */
8477 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
8478 frame->save_regs_using_mov = false;
8479
8480 /* Skip return address. */
8481 offset = UNITS_PER_WORD;
8482
8483 /* Skip pushed static chain. */
8484 if (ix86_static_chain_on_stack)
8485 offset += UNITS_PER_WORD;
8486
8487 /* Skip saved base pointer. */
8488 if (frame_pointer_needed)
8489 offset += UNITS_PER_WORD;
8490
8491 frame->hard_frame_pointer_offset = offset;
8492
8493 /* Register save area */
8494 offset += frame->nregs * UNITS_PER_WORD;
8495 frame->reg_save_offset = offset;
8496
8497 /* Align and set SSE register save area. */
8498 if (frame->nsseregs)
8499 {
8500 /* The only ABI that has saved SSE registers (Win64) also has a
8501 16-byte aligned default stack, and thus we don't need to be
8502 within the re-aligned local stack frame to save them. */
8503 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
8504 offset = (offset + 16 - 1) & -16;
8505 offset += frame->nsseregs * 16;
8506 }
8507 frame->sse_reg_save_offset = offset;
8508
8509 /* The re-aligned stack starts here. Values before this point are not
8510 directly comparable with values below this point. In order to make
8511 sure that no value happens to be the same before and after, force
8512 the alignment computation below to add a non-zero value. */
8513 if (stack_realign_fp)
8514 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
8515
8516 /* Va-arg area */
8517 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8518 offset += frame->va_arg_size;
8519
8520 /* Align start of frame for local function. */
8521 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
8522
8523 /* Frame pointer points here. */
8524 frame->frame_pointer_offset = offset;
8525
8526 offset += size;
8527
8528 /* Add outgoing arguments area. Can be skipped if we eliminated
8529 all the function calls as dead code.
8530 Skipping is however impossible when function calls alloca. Alloca
8531 expander assumes that last crtl->outgoing_args_size
8532 of stack frame are unused. */
8533 if (ACCUMULATE_OUTGOING_ARGS
8534 && (!current_function_is_leaf || cfun->calls_alloca
8535 || ix86_current_function_calls_tls_descriptor))
8536 {
8537 offset += crtl->outgoing_args_size;
8538 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8539 }
8540 else
8541 frame->outgoing_arguments_size = 0;
8542
8543 /* Align stack boundary. Only needed if we're calling another function
8544 or using alloca. */
8545 if (!current_function_is_leaf || cfun->calls_alloca
8546 || ix86_current_function_calls_tls_descriptor)
8547 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
8548
8549 /* We've reached end of stack frame. */
8550 frame->stack_pointer_offset = offset;
8551
8552 /* Size prologue needs to allocate. */
8553 to_allocate = offset - frame->sse_reg_save_offset;
8554
8555 if ((!to_allocate && frame->nregs <= 1)
8556 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
8557 frame->save_regs_using_mov = false;
8558
8559 if (ix86_using_red_zone ()
8560 && current_function_sp_is_unchanging
8561 && current_function_is_leaf
8562 && !ix86_current_function_calls_tls_descriptor)
8563 {
8564 frame->red_zone_size = to_allocate;
8565 if (frame->save_regs_using_mov)
8566 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8567 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8568 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8569 }
8570 else
8571 frame->red_zone_size = 0;
8572 frame->stack_pointer_offset -= frame->red_zone_size;
8573 }
8574
8575 /* This is semi-inlined memory_address_length, but simplified
8576 since we know that we're always dealing with reg+offset, and
8577 to avoid having to create and discard all that rtl. */
8578
8579 static inline int
8580 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
8581 {
8582 int len = 4;
8583
8584 if (offset == 0)
8585 {
8586 /* EBP and R13 cannot be encoded without an offset. */
8587 len = (regno == BP_REG || regno == R13_REG);
8588 }
8589 else if (IN_RANGE (offset, -128, 127))
8590 len = 1;
8591
8592 /* ESP and R12 must be encoded with a SIB byte. */
8593 if (regno == SP_REG || regno == R12_REG)
8594 len++;
8595
8596 return len;
8597 }
8598
8599 /* Return an RTX that points to CFA_OFFSET within the stack frame.
8600 The valid base registers are taken from CFUN->MACHINE->FS. */
8601
8602 static rtx
8603 choose_baseaddr (HOST_WIDE_INT cfa_offset)
8604 {
8605 const struct machine_function *m = cfun->machine;
8606 rtx base_reg = NULL;
8607 HOST_WIDE_INT base_offset = 0;
8608
8609 if (m->use_fast_prologue_epilogue)
8610 {
8611 /* Choose the base register most likely to allow the most scheduling
8612 opportunities. Generally FP is valid througout the function,
8613 while DRAP must be reloaded within the epilogue. But choose either
8614 over the SP due to increased encoding size. */
8615
8616 if (m->fs.fp_valid)
8617 {
8618 base_reg = hard_frame_pointer_rtx;
8619 base_offset = m->fs.fp_offset - cfa_offset;
8620 }
8621 else if (m->fs.drap_valid)
8622 {
8623 base_reg = crtl->drap_reg;
8624 base_offset = 0 - cfa_offset;
8625 }
8626 else if (m->fs.sp_valid)
8627 {
8628 base_reg = stack_pointer_rtx;
8629 base_offset = m->fs.sp_offset - cfa_offset;
8630 }
8631 }
8632 else
8633 {
8634 HOST_WIDE_INT toffset;
8635 int len = 16, tlen;
8636
8637 /* Choose the base register with the smallest address encoding.
8638 With a tie, choose FP > DRAP > SP. */
8639 if (m->fs.sp_valid)
8640 {
8641 base_reg = stack_pointer_rtx;
8642 base_offset = m->fs.sp_offset - cfa_offset;
8643 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
8644 }
8645 if (m->fs.drap_valid)
8646 {
8647 toffset = 0 - cfa_offset;
8648 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
8649 if (tlen <= len)
8650 {
8651 base_reg = crtl->drap_reg;
8652 base_offset = toffset;
8653 len = tlen;
8654 }
8655 }
8656 if (m->fs.fp_valid)
8657 {
8658 toffset = m->fs.fp_offset - cfa_offset;
8659 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
8660 if (tlen <= len)
8661 {
8662 base_reg = hard_frame_pointer_rtx;
8663 base_offset = toffset;
8664 len = tlen;
8665 }
8666 }
8667 }
8668 gcc_assert (base_reg != NULL);
8669
8670 return plus_constant (base_reg, base_offset);
8671 }
8672
8673 /* Emit code to save registers in the prologue. */
8674
8675 static void
8676 ix86_emit_save_regs (void)
8677 {
8678 unsigned int regno;
8679 rtx insn;
8680
8681 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
8682 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8683 {
8684 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
8685 RTX_FRAME_RELATED_P (insn) = 1;
8686 }
8687 }
8688
8689 /* Emit a single register save at CFA - CFA_OFFSET. */
8690
8691 static void
8692 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
8693 HOST_WIDE_INT cfa_offset)
8694 {
8695 struct machine_function *m = cfun->machine;
8696 rtx reg = gen_rtx_REG (mode, regno);
8697 rtx mem, addr, base, insn;
8698
8699 addr = choose_baseaddr (cfa_offset);
8700 mem = gen_frame_mem (mode, addr);
8701
8702 /* For SSE saves, we need to indicate the 128-bit alignment. */
8703 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
8704
8705 insn = emit_move_insn (mem, reg);
8706 RTX_FRAME_RELATED_P (insn) = 1;
8707
8708 base = addr;
8709 if (GET_CODE (base) == PLUS)
8710 base = XEXP (base, 0);
8711 gcc_checking_assert (REG_P (base));
8712
8713 /* When saving registers into a re-aligned local stack frame, avoid
8714 any tricky guessing by dwarf2out. */
8715 if (m->fs.realigned)
8716 {
8717 gcc_checking_assert (stack_realign_drap);
8718
8719 if (regno == REGNO (crtl->drap_reg))
8720 {
8721 /* A bit of a hack. We force the DRAP register to be saved in
8722 the re-aligned stack frame, which provides us with a copy
8723 of the CFA that will last past the prologue. Install it. */
8724 gcc_checking_assert (cfun->machine->fs.fp_valid);
8725 addr = plus_constant (hard_frame_pointer_rtx,
8726 cfun->machine->fs.fp_offset - cfa_offset);
8727 mem = gen_rtx_MEM (mode, addr);
8728 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
8729 }
8730 else
8731 {
8732 /* The frame pointer is a stable reference within the
8733 aligned frame. Use it. */
8734 gcc_checking_assert (cfun->machine->fs.fp_valid);
8735 addr = plus_constant (hard_frame_pointer_rtx,
8736 cfun->machine->fs.fp_offset - cfa_offset);
8737 mem = gen_rtx_MEM (mode, addr);
8738 add_reg_note (insn, REG_CFA_EXPRESSION,
8739 gen_rtx_SET (VOIDmode, mem, reg));
8740 }
8741 }
8742
8743 /* The memory may not be relative to the current CFA register,
8744 which means that we may need to generate a new pattern for
8745 use by the unwind info. */
8746 else if (base != m->fs.cfa_reg)
8747 {
8748 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
8749 mem = gen_rtx_MEM (mode, addr);
8750 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
8751 }
8752 }
8753
8754 /* Emit code to save registers using MOV insns.
8755 First register is stored at CFA - CFA_OFFSET. */
8756 static void
8757 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
8758 {
8759 unsigned int regno;
8760
8761 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8762 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8763 {
8764 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
8765 cfa_offset -= UNITS_PER_WORD;
8766 }
8767 }
8768
8769 /* Emit code to save SSE registers using MOV insns.
8770 First register is stored at CFA - CFA_OFFSET. */
8771 static void
8772 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
8773 {
8774 unsigned int regno;
8775
8776 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8777 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8778 {
8779 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
8780 cfa_offset -= 16;
8781 }
8782 }
8783
8784 static GTY(()) rtx queued_cfa_restores;
8785
8786 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
8787 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
8788 Don't add the note if the previously saved value will be left untouched
8789 within stack red-zone till return, as unwinders can find the same value
8790 in the register and on the stack. */
8791
8792 static void
8793 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
8794 {
8795 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
8796 return;
8797
8798 if (insn)
8799 {
8800 add_reg_note (insn, REG_CFA_RESTORE, reg);
8801 RTX_FRAME_RELATED_P (insn) = 1;
8802 }
8803 else
8804 queued_cfa_restores
8805 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
8806 }
8807
8808 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
8809
8810 static void
8811 ix86_add_queued_cfa_restore_notes (rtx insn)
8812 {
8813 rtx last;
8814 if (!queued_cfa_restores)
8815 return;
8816 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
8817 ;
8818 XEXP (last, 1) = REG_NOTES (insn);
8819 REG_NOTES (insn) = queued_cfa_restores;
8820 queued_cfa_restores = NULL_RTX;
8821 RTX_FRAME_RELATED_P (insn) = 1;
8822 }
8823
8824 /* Expand prologue or epilogue stack adjustment.
8825 The pattern exist to put a dependency on all ebp-based memory accesses.
8826 STYLE should be negative if instructions should be marked as frame related,
8827 zero if %r11 register is live and cannot be freely used and positive
8828 otherwise. */
8829
8830 static void
8831 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
8832 int style, bool set_cfa)
8833 {
8834 struct machine_function *m = cfun->machine;
8835 rtx insn;
8836
8837 if (! TARGET_64BIT)
8838 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
8839 else if (x86_64_immediate_operand (offset, DImode))
8840 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
8841 else
8842 {
8843 rtx tmp;
8844 /* r11 is used by indirect sibcall return as well, set before the
8845 epilogue and used after the epilogue. */
8846 if (style)
8847 tmp = gen_rtx_REG (DImode, R11_REG);
8848 else
8849 {
8850 gcc_assert (src != hard_frame_pointer_rtx
8851 && dest != hard_frame_pointer_rtx);
8852 tmp = hard_frame_pointer_rtx;
8853 }
8854 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
8855 if (style < 0)
8856 RTX_FRAME_RELATED_P (insn) = 1;
8857
8858 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
8859 }
8860
8861 insn = emit_insn (insn);
8862 if (style >= 0)
8863 ix86_add_queued_cfa_restore_notes (insn);
8864
8865 if (set_cfa)
8866 {
8867 rtx r;
8868
8869 gcc_assert (m->fs.cfa_reg == src);
8870 m->fs.cfa_offset += INTVAL (offset);
8871 m->fs.cfa_reg = dest;
8872
8873 r = gen_rtx_PLUS (Pmode, src, offset);
8874 r = gen_rtx_SET (VOIDmode, dest, r);
8875 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
8876 RTX_FRAME_RELATED_P (insn) = 1;
8877 }
8878 else if (style < 0)
8879 RTX_FRAME_RELATED_P (insn) = 1;
8880
8881 if (dest == stack_pointer_rtx)
8882 {
8883 HOST_WIDE_INT ooffset = m->fs.sp_offset;
8884 bool valid = m->fs.sp_valid;
8885
8886 if (src == hard_frame_pointer_rtx)
8887 {
8888 valid = m->fs.fp_valid;
8889 ooffset = m->fs.fp_offset;
8890 }
8891 else if (src == crtl->drap_reg)
8892 {
8893 valid = m->fs.drap_valid;
8894 ooffset = 0;
8895 }
8896 else
8897 {
8898 /* Else there are two possibilities: SP itself, which we set
8899 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
8900 taken care of this by hand along the eh_return path. */
8901 gcc_checking_assert (src == stack_pointer_rtx
8902 || offset == const0_rtx);
8903 }
8904
8905 m->fs.sp_offset = ooffset - INTVAL (offset);
8906 m->fs.sp_valid = valid;
8907 }
8908 }
8909
8910 /* Find an available register to be used as dynamic realign argument
8911 pointer regsiter. Such a register will be written in prologue and
8912 used in begin of body, so it must not be
8913 1. parameter passing register.
8914 2. GOT pointer.
8915 We reuse static-chain register if it is available. Otherwise, we
8916 use DI for i386 and R13 for x86-64. We chose R13 since it has
8917 shorter encoding.
8918
8919 Return: the regno of chosen register. */
8920
8921 static unsigned int
8922 find_drap_reg (void)
8923 {
8924 tree decl = cfun->decl;
8925
8926 if (TARGET_64BIT)
8927 {
8928 /* Use R13 for nested function or function need static chain.
8929 Since function with tail call may use any caller-saved
8930 registers in epilogue, DRAP must not use caller-saved
8931 register in such case. */
8932 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8933 return R13_REG;
8934
8935 return R10_REG;
8936 }
8937 else
8938 {
8939 /* Use DI for nested function or function need static chain.
8940 Since function with tail call may use any caller-saved
8941 registers in epilogue, DRAP must not use caller-saved
8942 register in such case. */
8943 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8944 return DI_REG;
8945
8946 /* Reuse static chain register if it isn't used for parameter
8947 passing. */
8948 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8949 && !lookup_attribute ("fastcall",
8950 TYPE_ATTRIBUTES (TREE_TYPE (decl)))
8951 && !lookup_attribute ("thiscall",
8952 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8953 return CX_REG;
8954 else
8955 return DI_REG;
8956 }
8957 }
8958
8959 /* Return minimum incoming stack alignment. */
8960
8961 static unsigned int
8962 ix86_minimum_incoming_stack_boundary (bool sibcall)
8963 {
8964 unsigned int incoming_stack_boundary;
8965
8966 /* Prefer the one specified at command line. */
8967 if (ix86_user_incoming_stack_boundary)
8968 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
8969 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
8970 if -mstackrealign is used, it isn't used for sibcall check and
8971 estimated stack alignment is 128bit. */
8972 else if (!sibcall
8973 && !TARGET_64BIT
8974 && ix86_force_align_arg_pointer
8975 && crtl->stack_alignment_estimated == 128)
8976 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8977 else
8978 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
8979
8980 /* Incoming stack alignment can be changed on individual functions
8981 via force_align_arg_pointer attribute. We use the smallest
8982 incoming stack boundary. */
8983 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
8984 && lookup_attribute (ix86_force_align_arg_pointer_string,
8985 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8986 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8987
8988 /* The incoming stack frame has to be aligned at least at
8989 parm_stack_boundary. */
8990 if (incoming_stack_boundary < crtl->parm_stack_boundary)
8991 incoming_stack_boundary = crtl->parm_stack_boundary;
8992
8993 /* Stack at entrance of main is aligned by runtime. We use the
8994 smallest incoming stack boundary. */
8995 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
8996 && DECL_NAME (current_function_decl)
8997 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8998 && DECL_FILE_SCOPE_P (current_function_decl))
8999 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9000
9001 return incoming_stack_boundary;
9002 }
9003
9004 /* Update incoming stack boundary and estimated stack alignment. */
9005
9006 static void
9007 ix86_update_stack_boundary (void)
9008 {
9009 ix86_incoming_stack_boundary
9010 = ix86_minimum_incoming_stack_boundary (false);
9011
9012 /* x86_64 vararg needs 16byte stack alignment for register save
9013 area. */
9014 if (TARGET_64BIT
9015 && cfun->stdarg
9016 && crtl->stack_alignment_estimated < 128)
9017 crtl->stack_alignment_estimated = 128;
9018 }
9019
9020 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9021 needed or an rtx for DRAP otherwise. */
9022
9023 static rtx
9024 ix86_get_drap_rtx (void)
9025 {
9026 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9027 crtl->need_drap = true;
9028
9029 if (stack_realign_drap)
9030 {
9031 /* Assign DRAP to vDRAP and returns vDRAP */
9032 unsigned int regno = find_drap_reg ();
9033 rtx drap_vreg;
9034 rtx arg_ptr;
9035 rtx seq, insn;
9036
9037 arg_ptr = gen_rtx_REG (Pmode, regno);
9038 crtl->drap_reg = arg_ptr;
9039
9040 start_sequence ();
9041 drap_vreg = copy_to_reg (arg_ptr);
9042 seq = get_insns ();
9043 end_sequence ();
9044
9045 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9046 if (!optimize)
9047 {
9048 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9049 RTX_FRAME_RELATED_P (insn) = 1;
9050 }
9051 return drap_vreg;
9052 }
9053 else
9054 return NULL;
9055 }
9056
9057 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9058
9059 static rtx
9060 ix86_internal_arg_pointer (void)
9061 {
9062 return virtual_incoming_args_rtx;
9063 }
9064
9065 struct scratch_reg {
9066 rtx reg;
9067 bool saved;
9068 };
9069
9070 /* Return a short-lived scratch register for use on function entry.
9071 In 32-bit mode, it is valid only after the registers are saved
9072 in the prologue. This register must be released by means of
9073 release_scratch_register_on_entry once it is dead. */
9074
9075 static void
9076 get_scratch_register_on_entry (struct scratch_reg *sr)
9077 {
9078 int regno;
9079
9080 sr->saved = false;
9081
9082 if (TARGET_64BIT)
9083 {
9084 /* We always use R11 in 64-bit mode. */
9085 regno = R11_REG;
9086 }
9087 else
9088 {
9089 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9090 bool fastcall_p
9091 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9092 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9093 int regparm = ix86_function_regparm (fntype, decl);
9094 int drap_regno
9095 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9096
9097 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9098 for the static chain register. */
9099 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9100 && drap_regno != AX_REG)
9101 regno = AX_REG;
9102 else if (regparm < 2 && drap_regno != DX_REG)
9103 regno = DX_REG;
9104 /* ecx is the static chain register. */
9105 else if (regparm < 3 && !fastcall_p && !static_chain_p
9106 && drap_regno != CX_REG)
9107 regno = CX_REG;
9108 else if (ix86_save_reg (BX_REG, true))
9109 regno = BX_REG;
9110 /* esi is the static chain register. */
9111 else if (!(regparm == 3 && static_chain_p)
9112 && ix86_save_reg (SI_REG, true))
9113 regno = SI_REG;
9114 else if (ix86_save_reg (DI_REG, true))
9115 regno = DI_REG;
9116 else
9117 {
9118 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9119 sr->saved = true;
9120 }
9121 }
9122
9123 sr->reg = gen_rtx_REG (Pmode, regno);
9124 if (sr->saved)
9125 {
9126 rtx insn = emit_insn (gen_push (sr->reg));
9127 RTX_FRAME_RELATED_P (insn) = 1;
9128 }
9129 }
9130
9131 /* Release a scratch register obtained from the preceding function. */
9132
9133 static void
9134 release_scratch_register_on_entry (struct scratch_reg *sr)
9135 {
9136 if (sr->saved)
9137 {
9138 rtx x, insn = emit_insn (gen_pop (sr->reg));
9139
9140 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9141 RTX_FRAME_RELATED_P (insn) = 1;
9142 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9143 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9144 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9145 }
9146 }
9147
9148 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9149
9150 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9151
9152 static void
9153 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9154 {
9155 /* We skip the probe for the first interval + a small dope of 4 words and
9156 probe that many bytes past the specified size to maintain a protection
9157 area at the botton of the stack. */
9158 const int dope = 4 * UNITS_PER_WORD;
9159 rtx size_rtx = GEN_INT (size);
9160
9161 /* See if we have a constant small number of probes to generate. If so,
9162 that's the easy case. The run-time loop is made up of 11 insns in the
9163 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9164 for n # of intervals. */
9165 if (size <= 5 * PROBE_INTERVAL)
9166 {
9167 HOST_WIDE_INT i, adjust;
9168 bool first_probe = true;
9169
9170 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9171 values of N from 1 until it exceeds SIZE. If only one probe is
9172 needed, this will not generate any code. Then adjust and probe
9173 to PROBE_INTERVAL + SIZE. */
9174 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9175 {
9176 if (first_probe)
9177 {
9178 adjust = 2 * PROBE_INTERVAL + dope;
9179 first_probe = false;
9180 }
9181 else
9182 adjust = PROBE_INTERVAL;
9183
9184 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9185 plus_constant (stack_pointer_rtx, -adjust)));
9186 emit_stack_probe (stack_pointer_rtx);
9187 }
9188
9189 if (first_probe)
9190 adjust = size + PROBE_INTERVAL + dope;
9191 else
9192 adjust = size + PROBE_INTERVAL - i;
9193
9194 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9195 plus_constant (stack_pointer_rtx, -adjust)));
9196 emit_stack_probe (stack_pointer_rtx);
9197
9198 /* Adjust back to account for the additional first interval. */
9199 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9200 plus_constant (stack_pointer_rtx,
9201 PROBE_INTERVAL + dope)));
9202 }
9203
9204 /* Otherwise, do the same as above, but in a loop. Note that we must be
9205 extra careful with variables wrapping around because we might be at
9206 the very top (or the very bottom) of the address space and we have
9207 to be able to handle this case properly; in particular, we use an
9208 equality test for the loop condition. */
9209 else
9210 {
9211 HOST_WIDE_INT rounded_size;
9212 struct scratch_reg sr;
9213
9214 get_scratch_register_on_entry (&sr);
9215
9216
9217 /* Step 1: round SIZE to the previous multiple of the interval. */
9218
9219 rounded_size = size & -PROBE_INTERVAL;
9220
9221
9222 /* Step 2: compute initial and final value of the loop counter. */
9223
9224 /* SP = SP_0 + PROBE_INTERVAL. */
9225 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9226 plus_constant (stack_pointer_rtx,
9227 - (PROBE_INTERVAL + dope))));
9228
9229 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9230 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9231 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9232 gen_rtx_PLUS (Pmode, sr.reg,
9233 stack_pointer_rtx)));
9234
9235
9236 /* Step 3: the loop
9237
9238 while (SP != LAST_ADDR)
9239 {
9240 SP = SP + PROBE_INTERVAL
9241 probe at SP
9242 }
9243
9244 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9245 values of N from 1 until it is equal to ROUNDED_SIZE. */
9246
9247 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9248
9249
9250 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9251 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9252
9253 if (size != rounded_size)
9254 {
9255 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9256 plus_constant (stack_pointer_rtx,
9257 rounded_size - size)));
9258 emit_stack_probe (stack_pointer_rtx);
9259 }
9260
9261 /* Adjust back to account for the additional first interval. */
9262 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9263 plus_constant (stack_pointer_rtx,
9264 PROBE_INTERVAL + dope)));
9265
9266 release_scratch_register_on_entry (&sr);
9267 }
9268
9269 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9270 cfun->machine->fs.sp_offset += size;
9271
9272 /* Make sure nothing is scheduled before we are done. */
9273 emit_insn (gen_blockage ());
9274 }
9275
9276 /* Adjust the stack pointer up to REG while probing it. */
9277
9278 const char *
9279 output_adjust_stack_and_probe (rtx reg)
9280 {
9281 static int labelno = 0;
9282 char loop_lab[32], end_lab[32];
9283 rtx xops[2];
9284
9285 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9286 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9287
9288 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9289
9290 /* Jump to END_LAB if SP == LAST_ADDR. */
9291 xops[0] = stack_pointer_rtx;
9292 xops[1] = reg;
9293 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9294 fputs ("\tje\t", asm_out_file);
9295 assemble_name_raw (asm_out_file, end_lab);
9296 fputc ('\n', asm_out_file);
9297
9298 /* SP = SP + PROBE_INTERVAL. */
9299 xops[1] = GEN_INT (PROBE_INTERVAL);
9300 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9301
9302 /* Probe at SP. */
9303 xops[1] = const0_rtx;
9304 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9305
9306 fprintf (asm_out_file, "\tjmp\t");
9307 assemble_name_raw (asm_out_file, loop_lab);
9308 fputc ('\n', asm_out_file);
9309
9310 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9311
9312 return "";
9313 }
9314
9315 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9316 inclusive. These are offsets from the current stack pointer. */
9317
9318 static void
9319 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9320 {
9321 /* See if we have a constant small number of probes to generate. If so,
9322 that's the easy case. The run-time loop is made up of 7 insns in the
9323 generic case while the compile-time loop is made up of n insns for n #
9324 of intervals. */
9325 if (size <= 7 * PROBE_INTERVAL)
9326 {
9327 HOST_WIDE_INT i;
9328
9329 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9330 it exceeds SIZE. If only one probe is needed, this will not
9331 generate any code. Then probe at FIRST + SIZE. */
9332 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9333 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9334
9335 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9336 }
9337
9338 /* Otherwise, do the same as above, but in a loop. Note that we must be
9339 extra careful with variables wrapping around because we might be at
9340 the very top (or the very bottom) of the address space and we have
9341 to be able to handle this case properly; in particular, we use an
9342 equality test for the loop condition. */
9343 else
9344 {
9345 HOST_WIDE_INT rounded_size, last;
9346 struct scratch_reg sr;
9347
9348 get_scratch_register_on_entry (&sr);
9349
9350
9351 /* Step 1: round SIZE to the previous multiple of the interval. */
9352
9353 rounded_size = size & -PROBE_INTERVAL;
9354
9355
9356 /* Step 2: compute initial and final value of the loop counter. */
9357
9358 /* TEST_OFFSET = FIRST. */
9359 emit_move_insn (sr.reg, GEN_INT (-first));
9360
9361 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9362 last = first + rounded_size;
9363
9364
9365 /* Step 3: the loop
9366
9367 while (TEST_ADDR != LAST_ADDR)
9368 {
9369 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9370 probe at TEST_ADDR
9371 }
9372
9373 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9374 until it is equal to ROUNDED_SIZE. */
9375
9376 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9377
9378
9379 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9380 that SIZE is equal to ROUNDED_SIZE. */
9381
9382 if (size != rounded_size)
9383 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9384 stack_pointer_rtx,
9385 sr.reg),
9386 rounded_size - size));
9387
9388 release_scratch_register_on_entry (&sr);
9389 }
9390
9391 /* Make sure nothing is scheduled before we are done. */
9392 emit_insn (gen_blockage ());
9393 }
9394
9395 /* Probe a range of stack addresses from REG to END, inclusive. These are
9396 offsets from the current stack pointer. */
9397
9398 const char *
9399 output_probe_stack_range (rtx reg, rtx end)
9400 {
9401 static int labelno = 0;
9402 char loop_lab[32], end_lab[32];
9403 rtx xops[3];
9404
9405 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9406 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9407
9408 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9409
9410 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9411 xops[0] = reg;
9412 xops[1] = end;
9413 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9414 fputs ("\tje\t", asm_out_file);
9415 assemble_name_raw (asm_out_file, end_lab);
9416 fputc ('\n', asm_out_file);
9417
9418 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9419 xops[1] = GEN_INT (PROBE_INTERVAL);
9420 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9421
9422 /* Probe at TEST_ADDR. */
9423 xops[0] = stack_pointer_rtx;
9424 xops[1] = reg;
9425 xops[2] = const0_rtx;
9426 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9427
9428 fprintf (asm_out_file, "\tjmp\t");
9429 assemble_name_raw (asm_out_file, loop_lab);
9430 fputc ('\n', asm_out_file);
9431
9432 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9433
9434 return "";
9435 }
9436
9437 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9438 to be generated in correct form. */
9439 static void
9440 ix86_finalize_stack_realign_flags (void)
9441 {
9442 /* Check if stack realign is really needed after reload, and
9443 stores result in cfun */
9444 unsigned int incoming_stack_boundary
9445 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9446 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9447 unsigned int stack_realign = (incoming_stack_boundary
9448 < (current_function_is_leaf
9449 ? crtl->max_used_stack_slot_alignment
9450 : crtl->stack_alignment_needed));
9451
9452 if (crtl->stack_realign_finalized)
9453 {
9454 /* After stack_realign_needed is finalized, we can't no longer
9455 change it. */
9456 gcc_assert (crtl->stack_realign_needed == stack_realign);
9457 }
9458 else
9459 {
9460 crtl->stack_realign_needed = stack_realign;
9461 crtl->stack_realign_finalized = true;
9462 }
9463 }
9464
9465 /* Expand the prologue into a bunch of separate insns. */
9466
9467 void
9468 ix86_expand_prologue (void)
9469 {
9470 struct machine_function *m = cfun->machine;
9471 rtx insn, t;
9472 bool pic_reg_used;
9473 struct ix86_frame frame;
9474 HOST_WIDE_INT allocate;
9475 bool int_registers_saved;
9476
9477 ix86_finalize_stack_realign_flags ();
9478
9479 /* DRAP should not coexist with stack_realign_fp */
9480 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9481
9482 memset (&m->fs, 0, sizeof (m->fs));
9483
9484 /* Initialize CFA state for before the prologue. */
9485 m->fs.cfa_reg = stack_pointer_rtx;
9486 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9487
9488 /* Track SP offset to the CFA. We continue tracking this after we've
9489 swapped the CFA register away from SP. In the case of re-alignment
9490 this is fudged; we're interested to offsets within the local frame. */
9491 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9492 m->fs.sp_valid = true;
9493
9494 ix86_compute_frame_layout (&frame);
9495
9496 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9497 {
9498 /* We should have already generated an error for any use of
9499 ms_hook on a nested function. */
9500 gcc_checking_assert (!ix86_static_chain_on_stack);
9501
9502 /* Check if profiling is active and we shall use profiling before
9503 prologue variant. If so sorry. */
9504 if (crtl->profile && flag_fentry != 0)
9505 sorry ("ms_hook_prologue attribute isn't compatible with -mfentry for 32-bit");
9506
9507 /* In ix86_asm_output_function_label we emitted:
9508 8b ff movl.s %edi,%edi
9509 55 push %ebp
9510 8b ec movl.s %esp,%ebp
9511
9512 This matches the hookable function prologue in Win32 API
9513 functions in Microsoft Windows XP Service Pack 2 and newer.
9514 Wine uses this to enable Windows apps to hook the Win32 API
9515 functions provided by Wine.
9516
9517 What that means is that we've already set up the frame pointer. */
9518
9519 if (frame_pointer_needed
9520 && !(crtl->drap_reg && crtl->stack_realign_needed))
9521 {
9522 rtx push, mov;
9523
9524 /* We've decided to use the frame pointer already set up.
9525 Describe this to the unwinder by pretending that both
9526 push and mov insns happen right here.
9527
9528 Putting the unwind info here at the end of the ms_hook
9529 is done so that we can make absolutely certain we get
9530 the required byte sequence at the start of the function,
9531 rather than relying on an assembler that can produce
9532 the exact encoding required.
9533
9534 However it does mean (in the unpatched case) that we have
9535 a 1 insn window where the asynchronous unwind info is
9536 incorrect. However, if we placed the unwind info at
9537 its correct location we would have incorrect unwind info
9538 in the patched case. Which is probably all moot since
9539 I don't expect Wine generates dwarf2 unwind info for the
9540 system libraries that use this feature. */
9541
9542 insn = emit_insn (gen_blockage ());
9543
9544 push = gen_push (hard_frame_pointer_rtx);
9545 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
9546 stack_pointer_rtx);
9547 RTX_FRAME_RELATED_P (push) = 1;
9548 RTX_FRAME_RELATED_P (mov) = 1;
9549
9550 RTX_FRAME_RELATED_P (insn) = 1;
9551 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9552 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
9553
9554 /* Note that gen_push incremented m->fs.cfa_offset, even
9555 though we didn't emit the push insn here. */
9556 m->fs.cfa_reg = hard_frame_pointer_rtx;
9557 m->fs.fp_offset = m->fs.cfa_offset;
9558 m->fs.fp_valid = true;
9559 }
9560 else
9561 {
9562 /* The frame pointer is not needed so pop %ebp again.
9563 This leaves us with a pristine state. */
9564 emit_insn (gen_pop (hard_frame_pointer_rtx));
9565 }
9566 }
9567
9568 /* The first insn of a function that accepts its static chain on the
9569 stack is to push the register that would be filled in by a direct
9570 call. This insn will be skipped by the trampoline. */
9571 else if (ix86_static_chain_on_stack)
9572 {
9573 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
9574 emit_insn (gen_blockage ());
9575
9576 /* We don't want to interpret this push insn as a register save,
9577 only as a stack adjustment. The real copy of the register as
9578 a save will be done later, if needed. */
9579 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
9580 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9581 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9582 RTX_FRAME_RELATED_P (insn) = 1;
9583 }
9584
9585 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9586 of DRAP is needed and stack realignment is really needed after reload */
9587 if (stack_realign_drap)
9588 {
9589 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9590
9591 /* Only need to push parameter pointer reg if it is caller saved. */
9592 if (!call_used_regs[REGNO (crtl->drap_reg)])
9593 {
9594 /* Push arg pointer reg */
9595 insn = emit_insn (gen_push (crtl->drap_reg));
9596 RTX_FRAME_RELATED_P (insn) = 1;
9597 }
9598
9599 /* Grab the argument pointer. */
9600 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
9601 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9602 RTX_FRAME_RELATED_P (insn) = 1;
9603 m->fs.cfa_reg = crtl->drap_reg;
9604 m->fs.cfa_offset = 0;
9605
9606 /* Align the stack. */
9607 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9608 stack_pointer_rtx,
9609 GEN_INT (-align_bytes)));
9610 RTX_FRAME_RELATED_P (insn) = 1;
9611
9612 /* Replicate the return address on the stack so that return
9613 address can be reached via (argp - 1) slot. This is needed
9614 to implement macro RETURN_ADDR_RTX and intrinsic function
9615 expand_builtin_return_addr etc. */
9616 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
9617 t = gen_frame_mem (Pmode, t);
9618 insn = emit_insn (gen_push (t));
9619 RTX_FRAME_RELATED_P (insn) = 1;
9620
9621 /* For the purposes of frame and register save area addressing,
9622 we've started over with a new frame. */
9623 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9624 m->fs.realigned = true;
9625 }
9626
9627 if (frame_pointer_needed && !m->fs.fp_valid)
9628 {
9629 /* Note: AT&T enter does NOT have reversed args. Enter is probably
9630 slower on all targets. Also sdb doesn't like it. */
9631 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
9632 RTX_FRAME_RELATED_P (insn) = 1;
9633
9634 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9635 RTX_FRAME_RELATED_P (insn) = 1;
9636
9637 if (m->fs.cfa_reg == stack_pointer_rtx)
9638 m->fs.cfa_reg = hard_frame_pointer_rtx;
9639 gcc_assert (m->fs.sp_offset == frame.hard_frame_pointer_offset);
9640 m->fs.fp_offset = m->fs.sp_offset;
9641 m->fs.fp_valid = true;
9642 }
9643
9644 int_registers_saved = (frame.nregs == 0);
9645
9646 if (!int_registers_saved)
9647 {
9648 /* If saving registers via PUSH, do so now. */
9649 if (!frame.save_regs_using_mov)
9650 {
9651 ix86_emit_save_regs ();
9652 int_registers_saved = true;
9653 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
9654 }
9655
9656 /* When using red zone we may start register saving before allocating
9657 the stack frame saving one cycle of the prologue. However, avoid
9658 doing this if we have to probe the stack; at least on x86_64 the
9659 stack probe can turn into a call that clobbers a red zone location. */
9660 else if (ix86_using_red_zone ()
9661 && (! TARGET_STACK_PROBE
9662 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
9663 {
9664 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9665 int_registers_saved = true;
9666 }
9667 }
9668
9669 if (stack_realign_fp)
9670 {
9671 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9672 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
9673
9674 /* The computation of the size of the re-aligned stack frame means
9675 that we must allocate the size of the register save area before
9676 performing the actual alignment. Otherwise we cannot guarantee
9677 that there's enough storage above the realignment point. */
9678 if (m->fs.sp_offset != frame.sse_reg_save_offset)
9679 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9680 GEN_INT (m->fs.sp_offset
9681 - frame.sse_reg_save_offset),
9682 -1, false);
9683
9684 /* Align the stack. */
9685 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9686 stack_pointer_rtx,
9687 GEN_INT (-align_bytes)));
9688
9689 /* For the purposes of register save area addressing, the stack
9690 pointer is no longer valid. As for the value of sp_offset,
9691 see ix86_compute_frame_layout, which we need to match in order
9692 to pass verification of stack_pointer_offset at the end. */
9693 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
9694 m->fs.sp_valid = false;
9695 }
9696
9697 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
9698
9699 if (flag_stack_usage)
9700 {
9701 /* We start to count from ARG_POINTER. */
9702 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
9703
9704 /* If it was realigned, take into account the fake frame. */
9705 if (stack_realign_drap)
9706 {
9707 if (ix86_static_chain_on_stack)
9708 stack_size += UNITS_PER_WORD;
9709
9710 if (!call_used_regs[REGNO (crtl->drap_reg)])
9711 stack_size += UNITS_PER_WORD;
9712
9713 /* This over-estimates by 1 minimal-stack-alignment-unit but
9714 mitigates that by counting in the new return address slot. */
9715 current_function_dynamic_stack_size
9716 += crtl->stack_alignment_needed / BITS_PER_UNIT;
9717 }
9718
9719 current_function_static_stack_size = stack_size;
9720 }
9721
9722 /* The stack has already been decremented by the instruction calling us
9723 so we need to probe unconditionally to preserve the protection area. */
9724 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9725 {
9726 /* We expect the registers to be saved when probes are used. */
9727 gcc_assert (int_registers_saved);
9728
9729 if (STACK_CHECK_MOVING_SP)
9730 {
9731 ix86_adjust_stack_and_probe (allocate);
9732 allocate = 0;
9733 }
9734 else
9735 {
9736 HOST_WIDE_INT size = allocate;
9737
9738 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
9739 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
9740
9741 if (TARGET_STACK_PROBE)
9742 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
9743 else
9744 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
9745 }
9746 }
9747
9748 if (allocate == 0)
9749 ;
9750 else if (!ix86_target_stack_probe ()
9751 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
9752 {
9753 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9754 GEN_INT (-allocate), -1,
9755 m->fs.cfa_reg == stack_pointer_rtx);
9756 }
9757 else
9758 {
9759 rtx eax = gen_rtx_REG (Pmode, AX_REG);
9760 rtx r10 = NULL;
9761 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
9762
9763 bool eax_live = false;
9764 bool r10_live = false;
9765
9766 if (TARGET_64BIT)
9767 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
9768 if (!TARGET_64BIT_MS_ABI)
9769 eax_live = ix86_eax_live_at_start_p ();
9770
9771 if (eax_live)
9772 {
9773 emit_insn (gen_push (eax));
9774 allocate -= UNITS_PER_WORD;
9775 }
9776 if (r10_live)
9777 {
9778 r10 = gen_rtx_REG (Pmode, R10_REG);
9779 emit_insn (gen_push (r10));
9780 allocate -= UNITS_PER_WORD;
9781 }
9782
9783 emit_move_insn (eax, GEN_INT (allocate));
9784 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
9785
9786 /* Use the fact that AX still contains ALLOCATE. */
9787 adjust_stack_insn = (TARGET_64BIT
9788 ? gen_pro_epilogue_adjust_stack_di_sub
9789 : gen_pro_epilogue_adjust_stack_si_sub);
9790
9791 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
9792 stack_pointer_rtx, eax));
9793
9794 if (m->fs.cfa_reg == stack_pointer_rtx)
9795 {
9796 m->fs.cfa_offset += allocate;
9797
9798 RTX_FRAME_RELATED_P (insn) = 1;
9799 add_reg_note (insn, REG_CFA_ADJUST_CFA,
9800 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9801 plus_constant (stack_pointer_rtx,
9802 -allocate)));
9803 }
9804 m->fs.sp_offset += allocate;
9805
9806 if (r10_live && eax_live)
9807 {
9808 t = choose_baseaddr (m->fs.sp_offset - allocate);
9809 emit_move_insn (r10, gen_frame_mem (Pmode, t));
9810 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
9811 emit_move_insn (eax, gen_frame_mem (Pmode, t));
9812 }
9813 else if (eax_live || r10_live)
9814 {
9815 t = choose_baseaddr (m->fs.sp_offset - allocate);
9816 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
9817 }
9818 }
9819 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
9820
9821 if (!int_registers_saved)
9822 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9823 if (frame.nsseregs)
9824 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
9825
9826 pic_reg_used = false;
9827 if (pic_offset_table_rtx
9828 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9829 || crtl->profile))
9830 {
9831 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
9832
9833 if (alt_pic_reg_used != INVALID_REGNUM)
9834 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
9835
9836 pic_reg_used = true;
9837 }
9838
9839 if (pic_reg_used)
9840 {
9841 if (TARGET_64BIT)
9842 {
9843 if (ix86_cmodel == CM_LARGE_PIC)
9844 {
9845 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
9846 rtx label = gen_label_rtx ();
9847 emit_label (label);
9848 LABEL_PRESERVE_P (label) = 1;
9849 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
9850 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
9851 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
9852 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
9853 pic_offset_table_rtx, tmp_reg));
9854 }
9855 else
9856 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
9857 }
9858 else
9859 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
9860 }
9861
9862 /* In the pic_reg_used case, make sure that the got load isn't deleted
9863 when mcount needs it. Blockage to avoid call movement across mcount
9864 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
9865 note. */
9866 if (crtl->profile && !flag_fentry && pic_reg_used)
9867 emit_insn (gen_prologue_use (pic_offset_table_rtx));
9868
9869 if (crtl->drap_reg && !crtl->stack_realign_needed)
9870 {
9871 /* vDRAP is setup but after reload it turns out stack realign
9872 isn't necessary, here we will emit prologue to setup DRAP
9873 without stack realign adjustment */
9874 t = choose_baseaddr (0);
9875 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9876 }
9877
9878 /* Prevent instructions from being scheduled into register save push
9879 sequence when access to the redzone area is done through frame pointer.
9880 The offset between the frame pointer and the stack pointer is calculated
9881 relative to the value of the stack pointer at the end of the function
9882 prologue, and moving instructions that access redzone area via frame
9883 pointer inside push sequence violates this assumption. */
9884 if (frame_pointer_needed && frame.red_zone_size)
9885 emit_insn (gen_memory_blockage ());
9886
9887 /* Emit cld instruction if stringops are used in the function. */
9888 if (TARGET_CLD && ix86_current_function_needs_cld)
9889 emit_insn (gen_cld ());
9890 }
9891
9892 /* Emit code to restore REG using a POP insn. */
9893
9894 static void
9895 ix86_emit_restore_reg_using_pop (rtx reg)
9896 {
9897 struct machine_function *m = cfun->machine;
9898 rtx insn = emit_insn (gen_pop (reg));
9899
9900 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
9901 m->fs.sp_offset -= UNITS_PER_WORD;
9902
9903 if (m->fs.cfa_reg == crtl->drap_reg
9904 && REGNO (reg) == REGNO (crtl->drap_reg))
9905 {
9906 /* Previously we'd represented the CFA as an expression
9907 like *(%ebp - 8). We've just popped that value from
9908 the stack, which means we need to reset the CFA to
9909 the drap register. This will remain until we restore
9910 the stack pointer. */
9911 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9912 RTX_FRAME_RELATED_P (insn) = 1;
9913
9914 /* This means that the DRAP register is valid for addressing too. */
9915 m->fs.drap_valid = true;
9916 return;
9917 }
9918
9919 if (m->fs.cfa_reg == stack_pointer_rtx)
9920 {
9921 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
9922 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9923 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9924 RTX_FRAME_RELATED_P (insn) = 1;
9925
9926 m->fs.cfa_offset -= UNITS_PER_WORD;
9927 }
9928
9929 /* When the frame pointer is the CFA, and we pop it, we are
9930 swapping back to the stack pointer as the CFA. This happens
9931 for stack frames that don't allocate other data, so we assume
9932 the stack pointer is now pointing at the return address, i.e.
9933 the function entry state, which makes the offset be 1 word. */
9934 if (reg == hard_frame_pointer_rtx)
9935 {
9936 m->fs.fp_valid = false;
9937 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9938 {
9939 m->fs.cfa_reg = stack_pointer_rtx;
9940 m->fs.cfa_offset -= UNITS_PER_WORD;
9941
9942 add_reg_note (insn, REG_CFA_DEF_CFA,
9943 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9944 GEN_INT (m->fs.cfa_offset)));
9945 RTX_FRAME_RELATED_P (insn) = 1;
9946 }
9947 }
9948 }
9949
9950 /* Emit code to restore saved registers using POP insns. */
9951
9952 static void
9953 ix86_emit_restore_regs_using_pop (void)
9954 {
9955 unsigned int regno;
9956
9957 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9958 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
9959 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
9960 }
9961
9962 /* Emit code and notes for the LEAVE instruction. */
9963
9964 static void
9965 ix86_emit_leave (void)
9966 {
9967 struct machine_function *m = cfun->machine;
9968 rtx insn = emit_insn (ix86_gen_leave ());
9969
9970 ix86_add_queued_cfa_restore_notes (insn);
9971
9972 gcc_assert (m->fs.fp_valid);
9973 m->fs.sp_valid = true;
9974 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
9975 m->fs.fp_valid = false;
9976
9977 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9978 {
9979 m->fs.cfa_reg = stack_pointer_rtx;
9980 m->fs.cfa_offset = m->fs.sp_offset;
9981
9982 add_reg_note (insn, REG_CFA_DEF_CFA,
9983 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
9984 RTX_FRAME_RELATED_P (insn) = 1;
9985 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
9986 m->fs.fp_offset);
9987 }
9988 }
9989
9990 /* Emit code to restore saved registers using MOV insns.
9991 First register is restored from CFA - CFA_OFFSET. */
9992 static void
9993 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
9994 int maybe_eh_return)
9995 {
9996 struct machine_function *m = cfun->machine;
9997 unsigned int regno;
9998
9999 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10000 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10001 {
10002 rtx reg = gen_rtx_REG (Pmode, regno);
10003 rtx insn, mem;
10004
10005 mem = choose_baseaddr (cfa_offset);
10006 mem = gen_frame_mem (Pmode, mem);
10007 insn = emit_move_insn (reg, mem);
10008
10009 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10010 {
10011 /* Previously we'd represented the CFA as an expression
10012 like *(%ebp - 8). We've just popped that value from
10013 the stack, which means we need to reset the CFA to
10014 the drap register. This will remain until we restore
10015 the stack pointer. */
10016 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10017 RTX_FRAME_RELATED_P (insn) = 1;
10018
10019 /* This means that the DRAP register is valid for addressing. */
10020 m->fs.drap_valid = true;
10021 }
10022 else
10023 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10024
10025 cfa_offset -= UNITS_PER_WORD;
10026 }
10027 }
10028
10029 /* Emit code to restore saved registers using MOV insns.
10030 First register is restored from CFA - CFA_OFFSET. */
10031 static void
10032 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10033 int maybe_eh_return)
10034 {
10035 unsigned int regno;
10036
10037 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10038 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10039 {
10040 rtx reg = gen_rtx_REG (V4SFmode, regno);
10041 rtx mem;
10042
10043 mem = choose_baseaddr (cfa_offset);
10044 mem = gen_rtx_MEM (V4SFmode, mem);
10045 set_mem_align (mem, 128);
10046 emit_move_insn (reg, mem);
10047
10048 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10049
10050 cfa_offset -= 16;
10051 }
10052 }
10053
10054 /* Restore function stack, frame, and registers. */
10055
10056 void
10057 ix86_expand_epilogue (int style)
10058 {
10059 struct machine_function *m = cfun->machine;
10060 struct machine_frame_state frame_state_save = m->fs;
10061 struct ix86_frame frame;
10062 bool restore_regs_via_mov;
10063 bool using_drap;
10064
10065 ix86_finalize_stack_realign_flags ();
10066 ix86_compute_frame_layout (&frame);
10067
10068 m->fs.sp_valid = (!frame_pointer_needed
10069 || (current_function_sp_is_unchanging
10070 && !stack_realign_fp));
10071 gcc_assert (!m->fs.sp_valid
10072 || m->fs.sp_offset == frame.stack_pointer_offset);
10073
10074 /* The FP must be valid if the frame pointer is present. */
10075 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10076 gcc_assert (!m->fs.fp_valid
10077 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10078
10079 /* We must have *some* valid pointer to the stack frame. */
10080 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10081
10082 /* The DRAP is never valid at this point. */
10083 gcc_assert (!m->fs.drap_valid);
10084
10085 /* See the comment about red zone and frame
10086 pointer usage in ix86_expand_prologue. */
10087 if (frame_pointer_needed && frame.red_zone_size)
10088 emit_insn (gen_memory_blockage ());
10089
10090 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10091 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10092
10093 /* Determine the CFA offset of the end of the red-zone. */
10094 m->fs.red_zone_offset = 0;
10095 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10096 {
10097 /* The red-zone begins below the return address. */
10098 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10099
10100 /* When the register save area is in the aligned portion of
10101 the stack, determine the maximum runtime displacement that
10102 matches up with the aligned frame. */
10103 if (stack_realign_drap)
10104 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10105 + UNITS_PER_WORD);
10106 }
10107
10108 /* Special care must be taken for the normal return case of a function
10109 using eh_return: the eax and edx registers are marked as saved, but
10110 not restored along this path. Adjust the save location to match. */
10111 if (crtl->calls_eh_return && style != 2)
10112 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10113
10114 /* If we're only restoring one register and sp is not valid then
10115 using a move instruction to restore the register since it's
10116 less work than reloading sp and popping the register. */
10117 if (!m->fs.sp_valid && frame.nregs <= 1)
10118 restore_regs_via_mov = true;
10119 /* EH_RETURN requires the use of moves to function properly. */
10120 else if (crtl->calls_eh_return)
10121 restore_regs_via_mov = true;
10122 else if (TARGET_EPILOGUE_USING_MOVE
10123 && cfun->machine->use_fast_prologue_epilogue
10124 && (frame.nregs > 1
10125 || m->fs.sp_offset != frame.reg_save_offset))
10126 restore_regs_via_mov = true;
10127 else if (frame_pointer_needed
10128 && !frame.nregs
10129 && m->fs.sp_offset != frame.reg_save_offset)
10130 restore_regs_via_mov = true;
10131 else if (frame_pointer_needed
10132 && TARGET_USE_LEAVE
10133 && cfun->machine->use_fast_prologue_epilogue
10134 && frame.nregs == 1)
10135 restore_regs_via_mov = true;
10136 else
10137 restore_regs_via_mov = false;
10138
10139 if (restore_regs_via_mov || frame.nsseregs)
10140 {
10141 /* Ensure that the entire register save area is addressable via
10142 the stack pointer, if we will restore via sp. */
10143 if (TARGET_64BIT
10144 && m->fs.sp_offset > 0x7fffffff
10145 && !(m->fs.fp_valid || m->fs.drap_valid)
10146 && (frame.nsseregs + frame.nregs) != 0)
10147 {
10148 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10149 GEN_INT (m->fs.sp_offset
10150 - frame.sse_reg_save_offset),
10151 style,
10152 m->fs.cfa_reg == stack_pointer_rtx);
10153 }
10154 }
10155
10156 /* If there are any SSE registers to restore, then we have to do it
10157 via moves, since there's obviously no pop for SSE regs. */
10158 if (frame.nsseregs)
10159 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10160 style == 2);
10161
10162 if (restore_regs_via_mov)
10163 {
10164 rtx t;
10165
10166 if (frame.nregs)
10167 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10168
10169 /* eh_return epilogues need %ecx added to the stack pointer. */
10170 if (style == 2)
10171 {
10172 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10173
10174 /* Stack align doesn't work with eh_return. */
10175 gcc_assert (!stack_realign_drap);
10176 /* Neither does regparm nested functions. */
10177 gcc_assert (!ix86_static_chain_on_stack);
10178
10179 if (frame_pointer_needed)
10180 {
10181 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10182 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10183 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10184
10185 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10186 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10187
10188 /* Note that we use SA as a temporary CFA, as the return
10189 address is at the proper place relative to it. We
10190 pretend this happens at the FP restore insn because
10191 prior to this insn the FP would be stored at the wrong
10192 offset relative to SA, and after this insn we have no
10193 other reasonable register to use for the CFA. We don't
10194 bother resetting the CFA to the SP for the duration of
10195 the return insn. */
10196 add_reg_note (insn, REG_CFA_DEF_CFA,
10197 plus_constant (sa, UNITS_PER_WORD));
10198 ix86_add_queued_cfa_restore_notes (insn);
10199 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10200 RTX_FRAME_RELATED_P (insn) = 1;
10201
10202 m->fs.cfa_reg = sa;
10203 m->fs.cfa_offset = UNITS_PER_WORD;
10204 m->fs.fp_valid = false;
10205
10206 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10207 const0_rtx, style, false);
10208 }
10209 else
10210 {
10211 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10212 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10213 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10214 ix86_add_queued_cfa_restore_notes (insn);
10215
10216 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10217 if (m->fs.cfa_offset != UNITS_PER_WORD)
10218 {
10219 m->fs.cfa_offset = UNITS_PER_WORD;
10220 add_reg_note (insn, REG_CFA_DEF_CFA,
10221 plus_constant (stack_pointer_rtx,
10222 UNITS_PER_WORD));
10223 RTX_FRAME_RELATED_P (insn) = 1;
10224 }
10225 }
10226 m->fs.sp_offset = UNITS_PER_WORD;
10227 m->fs.sp_valid = true;
10228 }
10229 }
10230 else
10231 {
10232 /* First step is to deallocate the stack frame so that we can
10233 pop the registers. */
10234 if (!m->fs.sp_valid)
10235 {
10236 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10237 GEN_INT (m->fs.fp_offset
10238 - frame.reg_save_offset),
10239 style, false);
10240 }
10241 else if (m->fs.sp_offset != frame.reg_save_offset)
10242 {
10243 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10244 GEN_INT (m->fs.sp_offset
10245 - frame.reg_save_offset),
10246 style,
10247 m->fs.cfa_reg == stack_pointer_rtx);
10248 }
10249
10250 ix86_emit_restore_regs_using_pop ();
10251 }
10252
10253 /* If we used a stack pointer and haven't already got rid of it,
10254 then do so now. */
10255 if (m->fs.fp_valid)
10256 {
10257 /* If the stack pointer is valid and pointing at the frame
10258 pointer store address, then we only need a pop. */
10259 if (m->fs.sp_valid && m->fs.sp_offset == frame.hard_frame_pointer_offset)
10260 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10261 /* Leave results in shorter dependency chains on CPUs that are
10262 able to grok it fast. */
10263 else if (TARGET_USE_LEAVE
10264 || optimize_function_for_size_p (cfun)
10265 || !cfun->machine->use_fast_prologue_epilogue)
10266 ix86_emit_leave ();
10267 else
10268 {
10269 pro_epilogue_adjust_stack (stack_pointer_rtx,
10270 hard_frame_pointer_rtx,
10271 const0_rtx, style, !using_drap);
10272 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10273 }
10274 }
10275
10276 if (using_drap)
10277 {
10278 int param_ptr_offset = UNITS_PER_WORD;
10279 rtx insn;
10280
10281 gcc_assert (stack_realign_drap);
10282
10283 if (ix86_static_chain_on_stack)
10284 param_ptr_offset += UNITS_PER_WORD;
10285 if (!call_used_regs[REGNO (crtl->drap_reg)])
10286 param_ptr_offset += UNITS_PER_WORD;
10287
10288 insn = emit_insn (gen_rtx_SET
10289 (VOIDmode, stack_pointer_rtx,
10290 gen_rtx_PLUS (Pmode,
10291 crtl->drap_reg,
10292 GEN_INT (-param_ptr_offset))));
10293 m->fs.cfa_reg = stack_pointer_rtx;
10294 m->fs.cfa_offset = param_ptr_offset;
10295 m->fs.sp_offset = param_ptr_offset;
10296 m->fs.realigned = false;
10297
10298 add_reg_note (insn, REG_CFA_DEF_CFA,
10299 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10300 GEN_INT (param_ptr_offset)));
10301 RTX_FRAME_RELATED_P (insn) = 1;
10302
10303 if (!call_used_regs[REGNO (crtl->drap_reg)])
10304 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10305 }
10306
10307 /* At this point the stack pointer must be valid, and we must have
10308 restored all of the registers. We may not have deallocated the
10309 entire stack frame. We've delayed this until now because it may
10310 be possible to merge the local stack deallocation with the
10311 deallocation forced by ix86_static_chain_on_stack. */
10312 gcc_assert (m->fs.sp_valid);
10313 gcc_assert (!m->fs.fp_valid);
10314 gcc_assert (!m->fs.realigned);
10315 if (m->fs.sp_offset != UNITS_PER_WORD)
10316 {
10317 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10318 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10319 style, true);
10320 }
10321
10322 /* Sibcall epilogues don't want a return instruction. */
10323 if (style == 0)
10324 {
10325 m->fs = frame_state_save;
10326 return;
10327 }
10328
10329 if (crtl->args.pops_args && crtl->args.size)
10330 {
10331 rtx popc = GEN_INT (crtl->args.pops_args);
10332
10333 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10334 address, do explicit add, and jump indirectly to the caller. */
10335
10336 if (crtl->args.pops_args >= 65536)
10337 {
10338 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10339 rtx insn;
10340
10341 /* There is no "pascal" calling convention in any 64bit ABI. */
10342 gcc_assert (!TARGET_64BIT);
10343
10344 insn = emit_insn (gen_pop (ecx));
10345 m->fs.cfa_offset -= UNITS_PER_WORD;
10346 m->fs.sp_offset -= UNITS_PER_WORD;
10347
10348 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10349 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10350 add_reg_note (insn, REG_CFA_REGISTER,
10351 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10352 RTX_FRAME_RELATED_P (insn) = 1;
10353
10354 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10355 popc, -1, true);
10356 emit_jump_insn (gen_return_indirect_internal (ecx));
10357 }
10358 else
10359 emit_jump_insn (gen_return_pop_internal (popc));
10360 }
10361 else
10362 emit_jump_insn (gen_return_internal ());
10363
10364 /* Restore the state back to the state from the prologue,
10365 so that it's correct for the next epilogue. */
10366 m->fs = frame_state_save;
10367 }
10368
10369 /* Reset from the function's potential modifications. */
10370
10371 static void
10372 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10373 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10374 {
10375 if (pic_offset_table_rtx)
10376 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10377 #if TARGET_MACHO
10378 /* Mach-O doesn't support labels at the end of objects, so if
10379 it looks like we might want one, insert a NOP. */
10380 {
10381 rtx insn = get_last_insn ();
10382 while (insn
10383 && NOTE_P (insn)
10384 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10385 insn = PREV_INSN (insn);
10386 if (insn
10387 && (LABEL_P (insn)
10388 || (NOTE_P (insn)
10389 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10390 fputs ("\tnop\n", file);
10391 }
10392 #endif
10393
10394 }
10395
10396 /* Return a scratch register to use in the split stack prologue. The
10397 split stack prologue is used for -fsplit-stack. It is the first
10398 instructions in the function, even before the regular prologue.
10399 The scratch register can be any caller-saved register which is not
10400 used for parameters or for the static chain. */
10401
10402 static unsigned int
10403 split_stack_prologue_scratch_regno (void)
10404 {
10405 if (TARGET_64BIT)
10406 return R11_REG;
10407 else
10408 {
10409 bool is_fastcall;
10410 int regparm;
10411
10412 is_fastcall = (lookup_attribute ("fastcall",
10413 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10414 != NULL);
10415 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
10416
10417 if (is_fastcall)
10418 {
10419 if (DECL_STATIC_CHAIN (cfun->decl))
10420 {
10421 sorry ("-fsplit-stack does not support fastcall with "
10422 "nested function");
10423 return INVALID_REGNUM;
10424 }
10425 return AX_REG;
10426 }
10427 else if (regparm < 3)
10428 {
10429 if (!DECL_STATIC_CHAIN (cfun->decl))
10430 return CX_REG;
10431 else
10432 {
10433 if (regparm >= 2)
10434 {
10435 sorry ("-fsplit-stack does not support 2 register "
10436 " parameters for a nested function");
10437 return INVALID_REGNUM;
10438 }
10439 return DX_REG;
10440 }
10441 }
10442 else
10443 {
10444 /* FIXME: We could make this work by pushing a register
10445 around the addition and comparison. */
10446 sorry ("-fsplit-stack does not support 3 register parameters");
10447 return INVALID_REGNUM;
10448 }
10449 }
10450 }
10451
10452 /* A SYMBOL_REF for the function which allocates new stackspace for
10453 -fsplit-stack. */
10454
10455 static GTY(()) rtx split_stack_fn;
10456
10457 /* Handle -fsplit-stack. These are the first instructions in the
10458 function, even before the regular prologue. */
10459
10460 void
10461 ix86_expand_split_stack_prologue (void)
10462 {
10463 struct ix86_frame frame;
10464 HOST_WIDE_INT allocate;
10465 int args_size;
10466 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
10467 rtx scratch_reg = NULL_RTX;
10468 rtx varargs_label = NULL_RTX;
10469
10470 gcc_assert (flag_split_stack && reload_completed);
10471
10472 ix86_finalize_stack_realign_flags ();
10473 ix86_compute_frame_layout (&frame);
10474 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
10475
10476 /* This is the label we will branch to if we have enough stack
10477 space. We expect the basic block reordering pass to reverse this
10478 branch if optimizing, so that we branch in the unlikely case. */
10479 label = gen_label_rtx ();
10480
10481 /* We need to compare the stack pointer minus the frame size with
10482 the stack boundary in the TCB. The stack boundary always gives
10483 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10484 can compare directly. Otherwise we need to do an addition. */
10485
10486 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
10487 UNSPEC_STACK_CHECK);
10488 limit = gen_rtx_CONST (Pmode, limit);
10489 limit = gen_rtx_MEM (Pmode, limit);
10490 if (allocate < SPLIT_STACK_AVAILABLE)
10491 current = stack_pointer_rtx;
10492 else
10493 {
10494 unsigned int scratch_regno;
10495 rtx offset;
10496
10497 /* We need a scratch register to hold the stack pointer minus
10498 the required frame size. Since this is the very start of the
10499 function, the scratch register can be any caller-saved
10500 register which is not used for parameters. */
10501 offset = GEN_INT (- allocate);
10502 scratch_regno = split_stack_prologue_scratch_regno ();
10503 if (scratch_regno == INVALID_REGNUM)
10504 return;
10505 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10506 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
10507 {
10508 /* We don't use ix86_gen_add3 in this case because it will
10509 want to split to lea, but when not optimizing the insn
10510 will not be split after this point. */
10511 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10512 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10513 offset)));
10514 }
10515 else
10516 {
10517 emit_move_insn (scratch_reg, offset);
10518 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
10519 stack_pointer_rtx));
10520 }
10521 current = scratch_reg;
10522 }
10523
10524 ix86_expand_branch (GEU, current, limit, label);
10525 jump_insn = get_last_insn ();
10526 JUMP_LABEL (jump_insn) = label;
10527
10528 /* Mark the jump as very likely to be taken. */
10529 add_reg_note (jump_insn, REG_BR_PROB,
10530 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
10531
10532 /* Get more stack space. We pass in the desired stack space and the
10533 size of the arguments to copy to the new stack. In 32-bit mode
10534 we push the parameters; __morestack will return on a new stack
10535 anyhow. In 64-bit mode we pass the parameters in r10 and
10536 r11. */
10537 allocate_rtx = GEN_INT (allocate);
10538 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
10539 call_fusage = NULL_RTX;
10540 if (TARGET_64BIT)
10541 {
10542 rtx reg;
10543
10544 reg = gen_rtx_REG (Pmode, R10_REG);
10545
10546 /* If this function uses a static chain, it will be in %r10.
10547 Preserve it across the call to __morestack. */
10548 if (DECL_STATIC_CHAIN (cfun->decl))
10549 {
10550 rtx rax;
10551
10552 rax = gen_rtx_REG (Pmode, AX_REG);
10553 emit_move_insn (rax, reg);
10554 use_reg (&call_fusage, rax);
10555 }
10556
10557 emit_move_insn (reg, allocate_rtx);
10558 use_reg (&call_fusage, reg);
10559 reg = gen_rtx_REG (Pmode, R11_REG);
10560 emit_move_insn (reg, GEN_INT (args_size));
10561 use_reg (&call_fusage, reg);
10562 }
10563 else
10564 {
10565 emit_insn (gen_push (GEN_INT (args_size)));
10566 emit_insn (gen_push (allocate_rtx));
10567 }
10568 if (split_stack_fn == NULL_RTX)
10569 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
10570 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, split_stack_fn),
10571 GEN_INT (UNITS_PER_WORD), constm1_rtx,
10572 NULL_RTX, 0);
10573 add_function_usage_to (call_insn, call_fusage);
10574
10575 /* In order to make call/return prediction work right, we now need
10576 to execute a return instruction. See
10577 libgcc/config/i386/morestack.S for the details on how this works.
10578
10579 For flow purposes gcc must not see this as a return
10580 instruction--we need control flow to continue at the subsequent
10581 label. Therefore, we use an unspec. */
10582 gcc_assert (crtl->args.pops_args < 65536);
10583 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
10584
10585 /* If we are in 64-bit mode and this function uses a static chain,
10586 we saved %r10 in %rax before calling _morestack. */
10587 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
10588 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
10589 gen_rtx_REG (Pmode, AX_REG));
10590
10591 /* If this function calls va_start, we need to store a pointer to
10592 the arguments on the old stack, because they may not have been
10593 all copied to the new stack. At this point the old stack can be
10594 found at the frame pointer value used by __morestack, because
10595 __morestack has set that up before calling back to us. Here we
10596 store that pointer in a scratch register, and in
10597 ix86_expand_prologue we store the scratch register in a stack
10598 slot. */
10599 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10600 {
10601 unsigned int scratch_regno;
10602 rtx frame_reg;
10603 int words;
10604
10605 scratch_regno = split_stack_prologue_scratch_regno ();
10606 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10607 frame_reg = gen_rtx_REG (Pmode, BP_REG);
10608
10609 /* 64-bit:
10610 fp -> old fp value
10611 return address within this function
10612 return address of caller of this function
10613 stack arguments
10614 So we add three words to get to the stack arguments.
10615
10616 32-bit:
10617 fp -> old fp value
10618 return address within this function
10619 first argument to __morestack
10620 second argument to __morestack
10621 return address of caller of this function
10622 stack arguments
10623 So we add five words to get to the stack arguments.
10624 */
10625 words = TARGET_64BIT ? 3 : 5;
10626 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10627 gen_rtx_PLUS (Pmode, frame_reg,
10628 GEN_INT (words * UNITS_PER_WORD))));
10629
10630 varargs_label = gen_label_rtx ();
10631 emit_jump_insn (gen_jump (varargs_label));
10632 JUMP_LABEL (get_last_insn ()) = varargs_label;
10633
10634 emit_barrier ();
10635 }
10636
10637 emit_label (label);
10638 LABEL_NUSES (label) = 1;
10639
10640 /* If this function calls va_start, we now have to set the scratch
10641 register for the case where we do not call __morestack. In this
10642 case we need to set it based on the stack pointer. */
10643 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10644 {
10645 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10646 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10647 GEN_INT (UNITS_PER_WORD))));
10648
10649 emit_label (varargs_label);
10650 LABEL_NUSES (varargs_label) = 1;
10651 }
10652 }
10653
10654 /* We may have to tell the dataflow pass that the split stack prologue
10655 is initializing a scratch register. */
10656
10657 static void
10658 ix86_live_on_entry (bitmap regs)
10659 {
10660 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10661 {
10662 gcc_assert (flag_split_stack);
10663 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
10664 }
10665 }
10666 \f
10667 /* Extract the parts of an RTL expression that is a valid memory address
10668 for an instruction. Return 0 if the structure of the address is
10669 grossly off. Return -1 if the address contains ASHIFT, so it is not
10670 strictly valid, but still used for computing length of lea instruction. */
10671
10672 int
10673 ix86_decompose_address (rtx addr, struct ix86_address *out)
10674 {
10675 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
10676 rtx base_reg, index_reg;
10677 HOST_WIDE_INT scale = 1;
10678 rtx scale_rtx = NULL_RTX;
10679 rtx tmp;
10680 int retval = 1;
10681 enum ix86_address_seg seg = SEG_DEFAULT;
10682
10683 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
10684 base = addr;
10685 else if (GET_CODE (addr) == PLUS)
10686 {
10687 rtx addends[4], op;
10688 int n = 0, i;
10689
10690 op = addr;
10691 do
10692 {
10693 if (n >= 4)
10694 return 0;
10695 addends[n++] = XEXP (op, 1);
10696 op = XEXP (op, 0);
10697 }
10698 while (GET_CODE (op) == PLUS);
10699 if (n >= 4)
10700 return 0;
10701 addends[n] = op;
10702
10703 for (i = n; i >= 0; --i)
10704 {
10705 op = addends[i];
10706 switch (GET_CODE (op))
10707 {
10708 case MULT:
10709 if (index)
10710 return 0;
10711 index = XEXP (op, 0);
10712 scale_rtx = XEXP (op, 1);
10713 break;
10714
10715 case ASHIFT:
10716 if (index)
10717 return 0;
10718 index = XEXP (op, 0);
10719 tmp = XEXP (op, 1);
10720 if (!CONST_INT_P (tmp))
10721 return 0;
10722 scale = INTVAL (tmp);
10723 if ((unsigned HOST_WIDE_INT) scale > 3)
10724 return 0;
10725 scale = 1 << scale;
10726 break;
10727
10728 case UNSPEC:
10729 if (XINT (op, 1) == UNSPEC_TP
10730 && TARGET_TLS_DIRECT_SEG_REFS
10731 && seg == SEG_DEFAULT)
10732 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
10733 else
10734 return 0;
10735 break;
10736
10737 case REG:
10738 case SUBREG:
10739 if (!base)
10740 base = op;
10741 else if (!index)
10742 index = op;
10743 else
10744 return 0;
10745 break;
10746
10747 case CONST:
10748 case CONST_INT:
10749 case SYMBOL_REF:
10750 case LABEL_REF:
10751 if (disp)
10752 return 0;
10753 disp = op;
10754 break;
10755
10756 default:
10757 return 0;
10758 }
10759 }
10760 }
10761 else if (GET_CODE (addr) == MULT)
10762 {
10763 index = XEXP (addr, 0); /* index*scale */
10764 scale_rtx = XEXP (addr, 1);
10765 }
10766 else if (GET_CODE (addr) == ASHIFT)
10767 {
10768 /* We're called for lea too, which implements ashift on occasion. */
10769 index = XEXP (addr, 0);
10770 tmp = XEXP (addr, 1);
10771 if (!CONST_INT_P (tmp))
10772 return 0;
10773 scale = INTVAL (tmp);
10774 if ((unsigned HOST_WIDE_INT) scale > 3)
10775 return 0;
10776 scale = 1 << scale;
10777 retval = -1;
10778 }
10779 else
10780 disp = addr; /* displacement */
10781
10782 /* Extract the integral value of scale. */
10783 if (scale_rtx)
10784 {
10785 if (!CONST_INT_P (scale_rtx))
10786 return 0;
10787 scale = INTVAL (scale_rtx);
10788 }
10789
10790 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
10791 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
10792
10793 /* Avoid useless 0 displacement. */
10794 if (disp == const0_rtx && (base || index))
10795 disp = NULL_RTX;
10796
10797 /* Allow arg pointer and stack pointer as index if there is not scaling. */
10798 if (base_reg && index_reg && scale == 1
10799 && (index_reg == arg_pointer_rtx
10800 || index_reg == frame_pointer_rtx
10801 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
10802 {
10803 rtx tmp;
10804 tmp = base, base = index, index = tmp;
10805 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
10806 }
10807
10808 /* Special case: %ebp cannot be encoded as a base without a displacement.
10809 Similarly %r13. */
10810 if (!disp
10811 && base_reg
10812 && (base_reg == hard_frame_pointer_rtx
10813 || base_reg == frame_pointer_rtx
10814 || base_reg == arg_pointer_rtx
10815 || (REG_P (base_reg)
10816 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
10817 || REGNO (base_reg) == R13_REG))))
10818 disp = const0_rtx;
10819
10820 /* Special case: on K6, [%esi] makes the instruction vector decoded.
10821 Avoid this by transforming to [%esi+0].
10822 Reload calls address legitimization without cfun defined, so we need
10823 to test cfun for being non-NULL. */
10824 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
10825 && base_reg && !index_reg && !disp
10826 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
10827 disp = const0_rtx;
10828
10829 /* Special case: encode reg+reg instead of reg*2. */
10830 if (!base && index && scale == 2)
10831 base = index, base_reg = index_reg, scale = 1;
10832
10833 /* Special case: scaling cannot be encoded without base or displacement. */
10834 if (!base && !disp && index && scale != 1)
10835 disp = const0_rtx;
10836
10837 out->base = base;
10838 out->index = index;
10839 out->disp = disp;
10840 out->scale = scale;
10841 out->seg = seg;
10842
10843 return retval;
10844 }
10845 \f
10846 /* Return cost of the memory address x.
10847 For i386, it is better to use a complex address than let gcc copy
10848 the address into a reg and make a new pseudo. But not if the address
10849 requires to two regs - that would mean more pseudos with longer
10850 lifetimes. */
10851 static int
10852 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
10853 {
10854 struct ix86_address parts;
10855 int cost = 1;
10856 int ok = ix86_decompose_address (x, &parts);
10857
10858 gcc_assert (ok);
10859
10860 if (parts.base && GET_CODE (parts.base) == SUBREG)
10861 parts.base = SUBREG_REG (parts.base);
10862 if (parts.index && GET_CODE (parts.index) == SUBREG)
10863 parts.index = SUBREG_REG (parts.index);
10864
10865 /* Attempt to minimize number of registers in the address. */
10866 if ((parts.base
10867 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
10868 || (parts.index
10869 && (!REG_P (parts.index)
10870 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
10871 cost++;
10872
10873 if (parts.base
10874 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
10875 && parts.index
10876 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
10877 && parts.base != parts.index)
10878 cost++;
10879
10880 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
10881 since it's predecode logic can't detect the length of instructions
10882 and it degenerates to vector decoded. Increase cost of such
10883 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
10884 to split such addresses or even refuse such addresses at all.
10885
10886 Following addressing modes are affected:
10887 [base+scale*index]
10888 [scale*index+disp]
10889 [base+index]
10890
10891 The first and last case may be avoidable by explicitly coding the zero in
10892 memory address, but I don't have AMD-K6 machine handy to check this
10893 theory. */
10894
10895 if (TARGET_K6
10896 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
10897 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
10898 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
10899 cost += 10;
10900
10901 return cost;
10902 }
10903 \f
10904 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
10905 this is used for to form addresses to local data when -fPIC is in
10906 use. */
10907
10908 static bool
10909 darwin_local_data_pic (rtx disp)
10910 {
10911 return (GET_CODE (disp) == UNSPEC
10912 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
10913 }
10914
10915 /* Determine if a given RTX is a valid constant. We already know this
10916 satisfies CONSTANT_P. */
10917
10918 bool
10919 legitimate_constant_p (rtx x)
10920 {
10921 switch (GET_CODE (x))
10922 {
10923 case CONST:
10924 x = XEXP (x, 0);
10925
10926 if (GET_CODE (x) == PLUS)
10927 {
10928 if (!CONST_INT_P (XEXP (x, 1)))
10929 return false;
10930 x = XEXP (x, 0);
10931 }
10932
10933 if (TARGET_MACHO && darwin_local_data_pic (x))
10934 return true;
10935
10936 /* Only some unspecs are valid as "constants". */
10937 if (GET_CODE (x) == UNSPEC)
10938 switch (XINT (x, 1))
10939 {
10940 case UNSPEC_GOT:
10941 case UNSPEC_GOTOFF:
10942 case UNSPEC_PLTOFF:
10943 return TARGET_64BIT;
10944 case UNSPEC_TPOFF:
10945 case UNSPEC_NTPOFF:
10946 x = XVECEXP (x, 0, 0);
10947 return (GET_CODE (x) == SYMBOL_REF
10948 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10949 case UNSPEC_DTPOFF:
10950 x = XVECEXP (x, 0, 0);
10951 return (GET_CODE (x) == SYMBOL_REF
10952 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
10953 default:
10954 return false;
10955 }
10956
10957 /* We must have drilled down to a symbol. */
10958 if (GET_CODE (x) == LABEL_REF)
10959 return true;
10960 if (GET_CODE (x) != SYMBOL_REF)
10961 return false;
10962 /* FALLTHRU */
10963
10964 case SYMBOL_REF:
10965 /* TLS symbols are never valid. */
10966 if (SYMBOL_REF_TLS_MODEL (x))
10967 return false;
10968
10969 /* DLLIMPORT symbols are never valid. */
10970 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10971 && SYMBOL_REF_DLLIMPORT_P (x))
10972 return false;
10973 break;
10974
10975 case CONST_DOUBLE:
10976 if (GET_MODE (x) == TImode
10977 && x != CONST0_RTX (TImode)
10978 && !TARGET_64BIT)
10979 return false;
10980 break;
10981
10982 case CONST_VECTOR:
10983 if (!standard_sse_constant_p (x))
10984 return false;
10985
10986 default:
10987 break;
10988 }
10989
10990 /* Otherwise we handle everything else in the move patterns. */
10991 return true;
10992 }
10993
10994 /* Determine if it's legal to put X into the constant pool. This
10995 is not possible for the address of thread-local symbols, which
10996 is checked above. */
10997
10998 static bool
10999 ix86_cannot_force_const_mem (rtx x)
11000 {
11001 /* We can always put integral constants and vectors in memory. */
11002 switch (GET_CODE (x))
11003 {
11004 case CONST_INT:
11005 case CONST_DOUBLE:
11006 case CONST_VECTOR:
11007 return false;
11008
11009 default:
11010 break;
11011 }
11012 return !legitimate_constant_p (x);
11013 }
11014
11015
11016 /* Nonzero if the constant value X is a legitimate general operand
11017 when generating PIC code. It is given that flag_pic is on and
11018 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11019
11020 bool
11021 legitimate_pic_operand_p (rtx x)
11022 {
11023 rtx inner;
11024
11025 switch (GET_CODE (x))
11026 {
11027 case CONST:
11028 inner = XEXP (x, 0);
11029 if (GET_CODE (inner) == PLUS
11030 && CONST_INT_P (XEXP (inner, 1)))
11031 inner = XEXP (inner, 0);
11032
11033 /* Only some unspecs are valid as "constants". */
11034 if (GET_CODE (inner) == UNSPEC)
11035 switch (XINT (inner, 1))
11036 {
11037 case UNSPEC_GOT:
11038 case UNSPEC_GOTOFF:
11039 case UNSPEC_PLTOFF:
11040 return TARGET_64BIT;
11041 case UNSPEC_TPOFF:
11042 x = XVECEXP (inner, 0, 0);
11043 return (GET_CODE (x) == SYMBOL_REF
11044 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11045 case UNSPEC_MACHOPIC_OFFSET:
11046 return legitimate_pic_address_disp_p (x);
11047 default:
11048 return false;
11049 }
11050 /* FALLTHRU */
11051
11052 case SYMBOL_REF:
11053 case LABEL_REF:
11054 return legitimate_pic_address_disp_p (x);
11055
11056 default:
11057 return true;
11058 }
11059 }
11060
11061 /* Determine if a given CONST RTX is a valid memory displacement
11062 in PIC mode. */
11063
11064 bool
11065 legitimate_pic_address_disp_p (rtx disp)
11066 {
11067 bool saw_plus;
11068
11069 /* In 64bit mode we can allow direct addresses of symbols and labels
11070 when they are not dynamic symbols. */
11071 if (TARGET_64BIT)
11072 {
11073 rtx op0 = disp, op1;
11074
11075 switch (GET_CODE (disp))
11076 {
11077 case LABEL_REF:
11078 return true;
11079
11080 case CONST:
11081 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11082 break;
11083 op0 = XEXP (XEXP (disp, 0), 0);
11084 op1 = XEXP (XEXP (disp, 0), 1);
11085 if (!CONST_INT_P (op1)
11086 || INTVAL (op1) >= 16*1024*1024
11087 || INTVAL (op1) < -16*1024*1024)
11088 break;
11089 if (GET_CODE (op0) == LABEL_REF)
11090 return true;
11091 if (GET_CODE (op0) != SYMBOL_REF)
11092 break;
11093 /* FALLTHRU */
11094
11095 case SYMBOL_REF:
11096 /* TLS references should always be enclosed in UNSPEC. */
11097 if (SYMBOL_REF_TLS_MODEL (op0))
11098 return false;
11099 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
11100 && ix86_cmodel != CM_LARGE_PIC)
11101 return true;
11102 break;
11103
11104 default:
11105 break;
11106 }
11107 }
11108 if (GET_CODE (disp) != CONST)
11109 return false;
11110 disp = XEXP (disp, 0);
11111
11112 if (TARGET_64BIT)
11113 {
11114 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11115 of GOT tables. We should not need these anyway. */
11116 if (GET_CODE (disp) != UNSPEC
11117 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11118 && XINT (disp, 1) != UNSPEC_GOTOFF
11119 && XINT (disp, 1) != UNSPEC_PLTOFF))
11120 return false;
11121
11122 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11123 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11124 return false;
11125 return true;
11126 }
11127
11128 saw_plus = false;
11129 if (GET_CODE (disp) == PLUS)
11130 {
11131 if (!CONST_INT_P (XEXP (disp, 1)))
11132 return false;
11133 disp = XEXP (disp, 0);
11134 saw_plus = true;
11135 }
11136
11137 if (TARGET_MACHO && darwin_local_data_pic (disp))
11138 return true;
11139
11140 if (GET_CODE (disp) != UNSPEC)
11141 return false;
11142
11143 switch (XINT (disp, 1))
11144 {
11145 case UNSPEC_GOT:
11146 if (saw_plus)
11147 return false;
11148 /* We need to check for both symbols and labels because VxWorks loads
11149 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11150 details. */
11151 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11152 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11153 case UNSPEC_GOTOFF:
11154 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11155 While ABI specify also 32bit relocation but we don't produce it in
11156 small PIC model at all. */
11157 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11158 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11159 && !TARGET_64BIT)
11160 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11161 return false;
11162 case UNSPEC_GOTTPOFF:
11163 case UNSPEC_GOTNTPOFF:
11164 case UNSPEC_INDNTPOFF:
11165 if (saw_plus)
11166 return false;
11167 disp = XVECEXP (disp, 0, 0);
11168 return (GET_CODE (disp) == SYMBOL_REF
11169 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11170 case UNSPEC_NTPOFF:
11171 disp = XVECEXP (disp, 0, 0);
11172 return (GET_CODE (disp) == SYMBOL_REF
11173 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11174 case UNSPEC_DTPOFF:
11175 disp = XVECEXP (disp, 0, 0);
11176 return (GET_CODE (disp) == SYMBOL_REF
11177 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11178 }
11179
11180 return false;
11181 }
11182
11183 /* Recognizes RTL expressions that are valid memory addresses for an
11184 instruction. The MODE argument is the machine mode for the MEM
11185 expression that wants to use this address.
11186
11187 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11188 convert common non-canonical forms to canonical form so that they will
11189 be recognized. */
11190
11191 static bool
11192 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
11193 rtx addr, bool strict)
11194 {
11195 struct ix86_address parts;
11196 rtx base, index, disp;
11197 HOST_WIDE_INT scale;
11198
11199 if (ix86_decompose_address (addr, &parts) <= 0)
11200 /* Decomposition failed. */
11201 return false;
11202
11203 base = parts.base;
11204 index = parts.index;
11205 disp = parts.disp;
11206 scale = parts.scale;
11207
11208 /* Validate base register.
11209
11210 Don't allow SUBREG's that span more than a word here. It can lead to spill
11211 failures when the base is one word out of a two word structure, which is
11212 represented internally as a DImode int. */
11213
11214 if (base)
11215 {
11216 rtx reg;
11217
11218 if (REG_P (base))
11219 reg = base;
11220 else if (GET_CODE (base) == SUBREG
11221 && REG_P (SUBREG_REG (base))
11222 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
11223 <= UNITS_PER_WORD)
11224 reg = SUBREG_REG (base);
11225 else
11226 /* Base is not a register. */
11227 return false;
11228
11229 if (GET_MODE (base) != Pmode)
11230 /* Base is not in Pmode. */
11231 return false;
11232
11233 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
11234 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
11235 /* Base is not valid. */
11236 return false;
11237 }
11238
11239 /* Validate index register.
11240
11241 Don't allow SUBREG's that span more than a word here -- same as above. */
11242
11243 if (index)
11244 {
11245 rtx reg;
11246
11247 if (REG_P (index))
11248 reg = index;
11249 else if (GET_CODE (index) == SUBREG
11250 && REG_P (SUBREG_REG (index))
11251 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
11252 <= UNITS_PER_WORD)
11253 reg = SUBREG_REG (index);
11254 else
11255 /* Index is not a register. */
11256 return false;
11257
11258 if (GET_MODE (index) != Pmode)
11259 /* Index is not in Pmode. */
11260 return false;
11261
11262 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
11263 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
11264 /* Index is not valid. */
11265 return false;
11266 }
11267
11268 /* Validate scale factor. */
11269 if (scale != 1)
11270 {
11271 if (!index)
11272 /* Scale without index. */
11273 return false;
11274
11275 if (scale != 2 && scale != 4 && scale != 8)
11276 /* Scale is not a valid multiplier. */
11277 return false;
11278 }
11279
11280 /* Validate displacement. */
11281 if (disp)
11282 {
11283 if (GET_CODE (disp) == CONST
11284 && GET_CODE (XEXP (disp, 0)) == UNSPEC
11285 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
11286 switch (XINT (XEXP (disp, 0), 1))
11287 {
11288 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
11289 used. While ABI specify also 32bit relocations, we don't produce
11290 them at all and use IP relative instead. */
11291 case UNSPEC_GOT:
11292 case UNSPEC_GOTOFF:
11293 gcc_assert (flag_pic);
11294 if (!TARGET_64BIT)
11295 goto is_legitimate_pic;
11296
11297 /* 64bit address unspec. */
11298 return false;
11299
11300 case UNSPEC_GOTPCREL:
11301 gcc_assert (flag_pic);
11302 goto is_legitimate_pic;
11303
11304 case UNSPEC_GOTTPOFF:
11305 case UNSPEC_GOTNTPOFF:
11306 case UNSPEC_INDNTPOFF:
11307 case UNSPEC_NTPOFF:
11308 case UNSPEC_DTPOFF:
11309 break;
11310
11311 case UNSPEC_STACK_CHECK:
11312 gcc_assert (flag_split_stack);
11313 break;
11314
11315 default:
11316 /* Invalid address unspec. */
11317 return false;
11318 }
11319
11320 else if (SYMBOLIC_CONST (disp)
11321 && (flag_pic
11322 || (TARGET_MACHO
11323 #if TARGET_MACHO
11324 && MACHOPIC_INDIRECT
11325 && !machopic_operand_p (disp)
11326 #endif
11327 )))
11328 {
11329
11330 is_legitimate_pic:
11331 if (TARGET_64BIT && (index || base))
11332 {
11333 /* foo@dtpoff(%rX) is ok. */
11334 if (GET_CODE (disp) != CONST
11335 || GET_CODE (XEXP (disp, 0)) != PLUS
11336 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
11337 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
11338 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
11339 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
11340 /* Non-constant pic memory reference. */
11341 return false;
11342 }
11343 else if (! legitimate_pic_address_disp_p (disp))
11344 /* Displacement is an invalid pic construct. */
11345 return false;
11346
11347 /* This code used to verify that a symbolic pic displacement
11348 includes the pic_offset_table_rtx register.
11349
11350 While this is good idea, unfortunately these constructs may
11351 be created by "adds using lea" optimization for incorrect
11352 code like:
11353
11354 int a;
11355 int foo(int i)
11356 {
11357 return *(&a+i);
11358 }
11359
11360 This code is nonsensical, but results in addressing
11361 GOT table with pic_offset_table_rtx base. We can't
11362 just refuse it easily, since it gets matched by
11363 "addsi3" pattern, that later gets split to lea in the
11364 case output register differs from input. While this
11365 can be handled by separate addsi pattern for this case
11366 that never results in lea, this seems to be easier and
11367 correct fix for crash to disable this test. */
11368 }
11369 else if (GET_CODE (disp) != LABEL_REF
11370 && !CONST_INT_P (disp)
11371 && (GET_CODE (disp) != CONST
11372 || !legitimate_constant_p (disp))
11373 && (GET_CODE (disp) != SYMBOL_REF
11374 || !legitimate_constant_p (disp)))
11375 /* Displacement is not constant. */
11376 return false;
11377 else if (TARGET_64BIT
11378 && !x86_64_immediate_operand (disp, VOIDmode))
11379 /* Displacement is out of range. */
11380 return false;
11381 }
11382
11383 /* Everything looks valid. */
11384 return true;
11385 }
11386
11387 /* Determine if a given RTX is a valid constant address. */
11388
11389 bool
11390 constant_address_p (rtx x)
11391 {
11392 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
11393 }
11394 \f
11395 /* Return a unique alias set for the GOT. */
11396
11397 static alias_set_type
11398 ix86_GOT_alias_set (void)
11399 {
11400 static alias_set_type set = -1;
11401 if (set == -1)
11402 set = new_alias_set ();
11403 return set;
11404 }
11405
11406 /* Return a legitimate reference for ORIG (an address) using the
11407 register REG. If REG is 0, a new pseudo is generated.
11408
11409 There are two types of references that must be handled:
11410
11411 1. Global data references must load the address from the GOT, via
11412 the PIC reg. An insn is emitted to do this load, and the reg is
11413 returned.
11414
11415 2. Static data references, constant pool addresses, and code labels
11416 compute the address as an offset from the GOT, whose base is in
11417 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11418 differentiate them from global data objects. The returned
11419 address is the PIC reg + an unspec constant.
11420
11421 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11422 reg also appears in the address. */
11423
11424 static rtx
11425 legitimize_pic_address (rtx orig, rtx reg)
11426 {
11427 rtx addr = orig;
11428 rtx new_rtx = orig;
11429 rtx base;
11430
11431 #if TARGET_MACHO
11432 if (TARGET_MACHO && !TARGET_64BIT)
11433 {
11434 if (reg == 0)
11435 reg = gen_reg_rtx (Pmode);
11436 /* Use the generic Mach-O PIC machinery. */
11437 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
11438 }
11439 #endif
11440
11441 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
11442 new_rtx = addr;
11443 else if (TARGET_64BIT
11444 && ix86_cmodel != CM_SMALL_PIC
11445 && gotoff_operand (addr, Pmode))
11446 {
11447 rtx tmpreg;
11448 /* This symbol may be referenced via a displacement from the PIC
11449 base address (@GOTOFF). */
11450
11451 if (reload_in_progress)
11452 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11453 if (GET_CODE (addr) == CONST)
11454 addr = XEXP (addr, 0);
11455 if (GET_CODE (addr) == PLUS)
11456 {
11457 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11458 UNSPEC_GOTOFF);
11459 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11460 }
11461 else
11462 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11463 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11464 if (!reg)
11465 tmpreg = gen_reg_rtx (Pmode);
11466 else
11467 tmpreg = reg;
11468 emit_move_insn (tmpreg, new_rtx);
11469
11470 if (reg != 0)
11471 {
11472 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
11473 tmpreg, 1, OPTAB_DIRECT);
11474 new_rtx = reg;
11475 }
11476 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
11477 }
11478 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
11479 {
11480 /* This symbol may be referenced via a displacement from the PIC
11481 base address (@GOTOFF). */
11482
11483 if (reload_in_progress)
11484 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11485 if (GET_CODE (addr) == CONST)
11486 addr = XEXP (addr, 0);
11487 if (GET_CODE (addr) == PLUS)
11488 {
11489 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11490 UNSPEC_GOTOFF);
11491 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11492 }
11493 else
11494 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11495 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11496 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11497
11498 if (reg != 0)
11499 {
11500 emit_move_insn (reg, new_rtx);
11501 new_rtx = reg;
11502 }
11503 }
11504 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
11505 /* We can't use @GOTOFF for text labels on VxWorks;
11506 see gotoff_operand. */
11507 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
11508 {
11509 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11510 {
11511 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
11512 return legitimize_dllimport_symbol (addr, true);
11513 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
11514 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11515 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
11516 {
11517 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
11518 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11519 }
11520 }
11521
11522 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
11523 {
11524 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
11525 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11526 new_rtx = gen_const_mem (Pmode, new_rtx);
11527 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11528
11529 if (reg == 0)
11530 reg = gen_reg_rtx (Pmode);
11531 /* Use directly gen_movsi, otherwise the address is loaded
11532 into register for CSE. We don't want to CSE this addresses,
11533 instead we CSE addresses from the GOT table, so skip this. */
11534 emit_insn (gen_movsi (reg, new_rtx));
11535 new_rtx = reg;
11536 }
11537 else
11538 {
11539 /* This symbol must be referenced via a load from the
11540 Global Offset Table (@GOT). */
11541
11542 if (reload_in_progress)
11543 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11544 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
11545 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11546 if (TARGET_64BIT)
11547 new_rtx = force_reg (Pmode, new_rtx);
11548 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11549 new_rtx = gen_const_mem (Pmode, new_rtx);
11550 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11551
11552 if (reg == 0)
11553 reg = gen_reg_rtx (Pmode);
11554 emit_move_insn (reg, new_rtx);
11555 new_rtx = reg;
11556 }
11557 }
11558 else
11559 {
11560 if (CONST_INT_P (addr)
11561 && !x86_64_immediate_operand (addr, VOIDmode))
11562 {
11563 if (reg)
11564 {
11565 emit_move_insn (reg, addr);
11566 new_rtx = reg;
11567 }
11568 else
11569 new_rtx = force_reg (Pmode, addr);
11570 }
11571 else if (GET_CODE (addr) == CONST)
11572 {
11573 addr = XEXP (addr, 0);
11574
11575 /* We must match stuff we generate before. Assume the only
11576 unspecs that can get here are ours. Not that we could do
11577 anything with them anyway.... */
11578 if (GET_CODE (addr) == UNSPEC
11579 || (GET_CODE (addr) == PLUS
11580 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
11581 return orig;
11582 gcc_assert (GET_CODE (addr) == PLUS);
11583 }
11584 if (GET_CODE (addr) == PLUS)
11585 {
11586 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
11587
11588 /* Check first to see if this is a constant offset from a @GOTOFF
11589 symbol reference. */
11590 if (gotoff_operand (op0, Pmode)
11591 && CONST_INT_P (op1))
11592 {
11593 if (!TARGET_64BIT)
11594 {
11595 if (reload_in_progress)
11596 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11597 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
11598 UNSPEC_GOTOFF);
11599 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
11600 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11601 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11602
11603 if (reg != 0)
11604 {
11605 emit_move_insn (reg, new_rtx);
11606 new_rtx = reg;
11607 }
11608 }
11609 else
11610 {
11611 if (INTVAL (op1) < -16*1024*1024
11612 || INTVAL (op1) >= 16*1024*1024)
11613 {
11614 if (!x86_64_immediate_operand (op1, Pmode))
11615 op1 = force_reg (Pmode, op1);
11616 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
11617 }
11618 }
11619 }
11620 else
11621 {
11622 base = legitimize_pic_address (XEXP (addr, 0), reg);
11623 new_rtx = legitimize_pic_address (XEXP (addr, 1),
11624 base == reg ? NULL_RTX : reg);
11625
11626 if (CONST_INT_P (new_rtx))
11627 new_rtx = plus_constant (base, INTVAL (new_rtx));
11628 else
11629 {
11630 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
11631 {
11632 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
11633 new_rtx = XEXP (new_rtx, 1);
11634 }
11635 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
11636 }
11637 }
11638 }
11639 }
11640 return new_rtx;
11641 }
11642 \f
11643 /* Load the thread pointer. If TO_REG is true, force it into a register. */
11644
11645 static rtx
11646 get_thread_pointer (int to_reg)
11647 {
11648 rtx tp, reg, insn;
11649
11650 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
11651 if (!to_reg)
11652 return tp;
11653
11654 reg = gen_reg_rtx (Pmode);
11655 insn = gen_rtx_SET (VOIDmode, reg, tp);
11656 insn = emit_insn (insn);
11657
11658 return reg;
11659 }
11660
11661 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
11662 false if we expect this to be used for a memory address and true if
11663 we expect to load the address into a register. */
11664
11665 static rtx
11666 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
11667 {
11668 rtx dest, base, off, pic, tp;
11669 int type;
11670
11671 switch (model)
11672 {
11673 case TLS_MODEL_GLOBAL_DYNAMIC:
11674 dest = gen_reg_rtx (Pmode);
11675 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11676
11677 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11678 {
11679 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
11680
11681 start_sequence ();
11682 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
11683 insns = get_insns ();
11684 end_sequence ();
11685
11686 RTL_CONST_CALL_P (insns) = 1;
11687 emit_libcall_block (insns, dest, rax, x);
11688 }
11689 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11690 emit_insn (gen_tls_global_dynamic_64 (dest, x));
11691 else
11692 emit_insn (gen_tls_global_dynamic_32 (dest, x));
11693
11694 if (TARGET_GNU2_TLS)
11695 {
11696 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
11697
11698 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11699 }
11700 break;
11701
11702 case TLS_MODEL_LOCAL_DYNAMIC:
11703 base = gen_reg_rtx (Pmode);
11704 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11705
11706 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11707 {
11708 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
11709
11710 start_sequence ();
11711 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
11712 insns = get_insns ();
11713 end_sequence ();
11714
11715 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
11716 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
11717 RTL_CONST_CALL_P (insns) = 1;
11718 emit_libcall_block (insns, base, rax, note);
11719 }
11720 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11721 emit_insn (gen_tls_local_dynamic_base_64 (base));
11722 else
11723 emit_insn (gen_tls_local_dynamic_base_32 (base));
11724
11725 if (TARGET_GNU2_TLS)
11726 {
11727 rtx x = ix86_tls_module_base ();
11728
11729 set_unique_reg_note (get_last_insn (), REG_EQUIV,
11730 gen_rtx_MINUS (Pmode, x, tp));
11731 }
11732
11733 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
11734 off = gen_rtx_CONST (Pmode, off);
11735
11736 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
11737
11738 if (TARGET_GNU2_TLS)
11739 {
11740 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
11741
11742 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11743 }
11744
11745 break;
11746
11747 case TLS_MODEL_INITIAL_EXEC:
11748 if (TARGET_64BIT)
11749 {
11750 pic = NULL;
11751 type = UNSPEC_GOTNTPOFF;
11752 }
11753 else if (flag_pic)
11754 {
11755 if (reload_in_progress)
11756 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11757 pic = pic_offset_table_rtx;
11758 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
11759 }
11760 else if (!TARGET_ANY_GNU_TLS)
11761 {
11762 pic = gen_reg_rtx (Pmode);
11763 emit_insn (gen_set_got (pic));
11764 type = UNSPEC_GOTTPOFF;
11765 }
11766 else
11767 {
11768 pic = NULL;
11769 type = UNSPEC_INDNTPOFF;
11770 }
11771
11772 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
11773 off = gen_rtx_CONST (Pmode, off);
11774 if (pic)
11775 off = gen_rtx_PLUS (Pmode, pic, off);
11776 off = gen_const_mem (Pmode, off);
11777 set_mem_alias_set (off, ix86_GOT_alias_set ());
11778
11779 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11780 {
11781 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11782 off = force_reg (Pmode, off);
11783 return gen_rtx_PLUS (Pmode, base, off);
11784 }
11785 else
11786 {
11787 base = get_thread_pointer (true);
11788 dest = gen_reg_rtx (Pmode);
11789 emit_insn (gen_subsi3 (dest, base, off));
11790 }
11791 break;
11792
11793 case TLS_MODEL_LOCAL_EXEC:
11794 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
11795 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11796 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
11797 off = gen_rtx_CONST (Pmode, off);
11798
11799 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11800 {
11801 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11802 return gen_rtx_PLUS (Pmode, base, off);
11803 }
11804 else
11805 {
11806 base = get_thread_pointer (true);
11807 dest = gen_reg_rtx (Pmode);
11808 emit_insn (gen_subsi3 (dest, base, off));
11809 }
11810 break;
11811
11812 default:
11813 gcc_unreachable ();
11814 }
11815
11816 return dest;
11817 }
11818
11819 /* Create or return the unique __imp_DECL dllimport symbol corresponding
11820 to symbol DECL. */
11821
11822 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
11823 htab_t dllimport_map;
11824
11825 static tree
11826 get_dllimport_decl (tree decl)
11827 {
11828 struct tree_map *h, in;
11829 void **loc;
11830 const char *name;
11831 const char *prefix;
11832 size_t namelen, prefixlen;
11833 char *imp_name;
11834 tree to;
11835 rtx rtl;
11836
11837 if (!dllimport_map)
11838 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
11839
11840 in.hash = htab_hash_pointer (decl);
11841 in.base.from = decl;
11842 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
11843 h = (struct tree_map *) *loc;
11844 if (h)
11845 return h->to;
11846
11847 *loc = h = ggc_alloc_tree_map ();
11848 h->hash = in.hash;
11849 h->base.from = decl;
11850 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
11851 VAR_DECL, NULL, ptr_type_node);
11852 DECL_ARTIFICIAL (to) = 1;
11853 DECL_IGNORED_P (to) = 1;
11854 DECL_EXTERNAL (to) = 1;
11855 TREE_READONLY (to) = 1;
11856
11857 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
11858 name = targetm.strip_name_encoding (name);
11859 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
11860 ? "*__imp_" : "*__imp__";
11861 namelen = strlen (name);
11862 prefixlen = strlen (prefix);
11863 imp_name = (char *) alloca (namelen + prefixlen + 1);
11864 memcpy (imp_name, prefix, prefixlen);
11865 memcpy (imp_name + prefixlen, name, namelen + 1);
11866
11867 name = ggc_alloc_string (imp_name, namelen + prefixlen);
11868 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
11869 SET_SYMBOL_REF_DECL (rtl, to);
11870 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
11871
11872 rtl = gen_const_mem (Pmode, rtl);
11873 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
11874
11875 SET_DECL_RTL (to, rtl);
11876 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
11877
11878 return to;
11879 }
11880
11881 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
11882 true if we require the result be a register. */
11883
11884 static rtx
11885 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
11886 {
11887 tree imp_decl;
11888 rtx x;
11889
11890 gcc_assert (SYMBOL_REF_DECL (symbol));
11891 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
11892
11893 x = DECL_RTL (imp_decl);
11894 if (want_reg)
11895 x = force_reg (Pmode, x);
11896 return x;
11897 }
11898
11899 /* Try machine-dependent ways of modifying an illegitimate address
11900 to be legitimate. If we find one, return the new, valid address.
11901 This macro is used in only one place: `memory_address' in explow.c.
11902
11903 OLDX is the address as it was before break_out_memory_refs was called.
11904 In some cases it is useful to look at this to decide what needs to be done.
11905
11906 It is always safe for this macro to do nothing. It exists to recognize
11907 opportunities to optimize the output.
11908
11909 For the 80386, we handle X+REG by loading X into a register R and
11910 using R+REG. R will go in a general reg and indexing will be used.
11911 However, if REG is a broken-out memory address or multiplication,
11912 nothing needs to be done because REG can certainly go in a general reg.
11913
11914 When -fpic is used, special handling is needed for symbolic references.
11915 See comments by legitimize_pic_address in i386.c for details. */
11916
11917 static rtx
11918 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
11919 enum machine_mode mode)
11920 {
11921 int changed = 0;
11922 unsigned log;
11923
11924 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
11925 if (log)
11926 return legitimize_tls_address (x, (enum tls_model) log, false);
11927 if (GET_CODE (x) == CONST
11928 && GET_CODE (XEXP (x, 0)) == PLUS
11929 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11930 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
11931 {
11932 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
11933 (enum tls_model) log, false);
11934 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11935 }
11936
11937 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11938 {
11939 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
11940 return legitimize_dllimport_symbol (x, true);
11941 if (GET_CODE (x) == CONST
11942 && GET_CODE (XEXP (x, 0)) == PLUS
11943 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11944 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
11945 {
11946 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
11947 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11948 }
11949 }
11950
11951 if (flag_pic && SYMBOLIC_CONST (x))
11952 return legitimize_pic_address (x, 0);
11953
11954 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
11955 if (GET_CODE (x) == ASHIFT
11956 && CONST_INT_P (XEXP (x, 1))
11957 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
11958 {
11959 changed = 1;
11960 log = INTVAL (XEXP (x, 1));
11961 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
11962 GEN_INT (1 << log));
11963 }
11964
11965 if (GET_CODE (x) == PLUS)
11966 {
11967 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
11968
11969 if (GET_CODE (XEXP (x, 0)) == ASHIFT
11970 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11971 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
11972 {
11973 changed = 1;
11974 log = INTVAL (XEXP (XEXP (x, 0), 1));
11975 XEXP (x, 0) = gen_rtx_MULT (Pmode,
11976 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
11977 GEN_INT (1 << log));
11978 }
11979
11980 if (GET_CODE (XEXP (x, 1)) == ASHIFT
11981 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11982 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
11983 {
11984 changed = 1;
11985 log = INTVAL (XEXP (XEXP (x, 1), 1));
11986 XEXP (x, 1) = gen_rtx_MULT (Pmode,
11987 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
11988 GEN_INT (1 << log));
11989 }
11990
11991 /* Put multiply first if it isn't already. */
11992 if (GET_CODE (XEXP (x, 1)) == MULT)
11993 {
11994 rtx tmp = XEXP (x, 0);
11995 XEXP (x, 0) = XEXP (x, 1);
11996 XEXP (x, 1) = tmp;
11997 changed = 1;
11998 }
11999
12000 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12001 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12002 created by virtual register instantiation, register elimination, and
12003 similar optimizations. */
12004 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12005 {
12006 changed = 1;
12007 x = gen_rtx_PLUS (Pmode,
12008 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12009 XEXP (XEXP (x, 1), 0)),
12010 XEXP (XEXP (x, 1), 1));
12011 }
12012
12013 /* Canonicalize
12014 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12015 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12016 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12017 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12018 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12019 && CONSTANT_P (XEXP (x, 1)))
12020 {
12021 rtx constant;
12022 rtx other = NULL_RTX;
12023
12024 if (CONST_INT_P (XEXP (x, 1)))
12025 {
12026 constant = XEXP (x, 1);
12027 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12028 }
12029 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12030 {
12031 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12032 other = XEXP (x, 1);
12033 }
12034 else
12035 constant = 0;
12036
12037 if (constant)
12038 {
12039 changed = 1;
12040 x = gen_rtx_PLUS (Pmode,
12041 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12042 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12043 plus_constant (other, INTVAL (constant)));
12044 }
12045 }
12046
12047 if (changed && ix86_legitimate_address_p (mode, x, false))
12048 return x;
12049
12050 if (GET_CODE (XEXP (x, 0)) == MULT)
12051 {
12052 changed = 1;
12053 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
12054 }
12055
12056 if (GET_CODE (XEXP (x, 1)) == MULT)
12057 {
12058 changed = 1;
12059 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
12060 }
12061
12062 if (changed
12063 && REG_P (XEXP (x, 1))
12064 && REG_P (XEXP (x, 0)))
12065 return x;
12066
12067 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12068 {
12069 changed = 1;
12070 x = legitimize_pic_address (x, 0);
12071 }
12072
12073 if (changed && ix86_legitimate_address_p (mode, x, false))
12074 return x;
12075
12076 if (REG_P (XEXP (x, 0)))
12077 {
12078 rtx temp = gen_reg_rtx (Pmode);
12079 rtx val = force_operand (XEXP (x, 1), temp);
12080 if (val != temp)
12081 emit_move_insn (temp, val);
12082
12083 XEXP (x, 1) = temp;
12084 return x;
12085 }
12086
12087 else if (REG_P (XEXP (x, 1)))
12088 {
12089 rtx temp = gen_reg_rtx (Pmode);
12090 rtx val = force_operand (XEXP (x, 0), temp);
12091 if (val != temp)
12092 emit_move_insn (temp, val);
12093
12094 XEXP (x, 0) = temp;
12095 return x;
12096 }
12097 }
12098
12099 return x;
12100 }
12101 \f
12102 /* Print an integer constant expression in assembler syntax. Addition
12103 and subtraction are the only arithmetic that may appear in these
12104 expressions. FILE is the stdio stream to write to, X is the rtx, and
12105 CODE is the operand print code from the output string. */
12106
12107 static void
12108 output_pic_addr_const (FILE *file, rtx x, int code)
12109 {
12110 char buf[256];
12111
12112 switch (GET_CODE (x))
12113 {
12114 case PC:
12115 gcc_assert (flag_pic);
12116 putc ('.', file);
12117 break;
12118
12119 case SYMBOL_REF:
12120 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
12121 output_addr_const (file, x);
12122 else
12123 {
12124 const char *name = XSTR (x, 0);
12125
12126 /* Mark the decl as referenced so that cgraph will
12127 output the function. */
12128 if (SYMBOL_REF_DECL (x))
12129 mark_decl_referenced (SYMBOL_REF_DECL (x));
12130
12131 #if TARGET_MACHO
12132 if (MACHOPIC_INDIRECT
12133 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12134 name = machopic_indirection_name (x, /*stub_p=*/true);
12135 #endif
12136 assemble_name (file, name);
12137 }
12138 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12139 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
12140 fputs ("@PLT", file);
12141 break;
12142
12143 case LABEL_REF:
12144 x = XEXP (x, 0);
12145 /* FALLTHRU */
12146 case CODE_LABEL:
12147 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
12148 assemble_name (asm_out_file, buf);
12149 break;
12150
12151 case CONST_INT:
12152 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12153 break;
12154
12155 case CONST:
12156 /* This used to output parentheses around the expression,
12157 but that does not work on the 386 (either ATT or BSD assembler). */
12158 output_pic_addr_const (file, XEXP (x, 0), code);
12159 break;
12160
12161 case CONST_DOUBLE:
12162 if (GET_MODE (x) == VOIDmode)
12163 {
12164 /* We can use %d if the number is <32 bits and positive. */
12165 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
12166 fprintf (file, "0x%lx%08lx",
12167 (unsigned long) CONST_DOUBLE_HIGH (x),
12168 (unsigned long) CONST_DOUBLE_LOW (x));
12169 else
12170 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
12171 }
12172 else
12173 /* We can't handle floating point constants;
12174 TARGET_PRINT_OPERAND must handle them. */
12175 output_operand_lossage ("floating constant misused");
12176 break;
12177
12178 case PLUS:
12179 /* Some assemblers need integer constants to appear first. */
12180 if (CONST_INT_P (XEXP (x, 0)))
12181 {
12182 output_pic_addr_const (file, XEXP (x, 0), code);
12183 putc ('+', file);
12184 output_pic_addr_const (file, XEXP (x, 1), code);
12185 }
12186 else
12187 {
12188 gcc_assert (CONST_INT_P (XEXP (x, 1)));
12189 output_pic_addr_const (file, XEXP (x, 1), code);
12190 putc ('+', file);
12191 output_pic_addr_const (file, XEXP (x, 0), code);
12192 }
12193 break;
12194
12195 case MINUS:
12196 if (!TARGET_MACHO)
12197 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
12198 output_pic_addr_const (file, XEXP (x, 0), code);
12199 putc ('-', file);
12200 output_pic_addr_const (file, XEXP (x, 1), code);
12201 if (!TARGET_MACHO)
12202 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
12203 break;
12204
12205 case UNSPEC:
12206 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
12207 {
12208 bool f = i386_asm_output_addr_const_extra (file, x);
12209 gcc_assert (f);
12210 break;
12211 }
12212
12213 gcc_assert (XVECLEN (x, 0) == 1);
12214 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
12215 switch (XINT (x, 1))
12216 {
12217 case UNSPEC_GOT:
12218 fputs ("@GOT", file);
12219 break;
12220 case UNSPEC_GOTOFF:
12221 fputs ("@GOTOFF", file);
12222 break;
12223 case UNSPEC_PLTOFF:
12224 fputs ("@PLTOFF", file);
12225 break;
12226 case UNSPEC_GOTPCREL:
12227 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12228 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
12229 break;
12230 case UNSPEC_GOTTPOFF:
12231 /* FIXME: This might be @TPOFF in Sun ld too. */
12232 fputs ("@gottpoff", file);
12233 break;
12234 case UNSPEC_TPOFF:
12235 fputs ("@tpoff", file);
12236 break;
12237 case UNSPEC_NTPOFF:
12238 if (TARGET_64BIT)
12239 fputs ("@tpoff", file);
12240 else
12241 fputs ("@ntpoff", file);
12242 break;
12243 case UNSPEC_DTPOFF:
12244 fputs ("@dtpoff", file);
12245 break;
12246 case UNSPEC_GOTNTPOFF:
12247 if (TARGET_64BIT)
12248 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12249 "@gottpoff(%rip)": "@gottpoff[rip]", file);
12250 else
12251 fputs ("@gotntpoff", file);
12252 break;
12253 case UNSPEC_INDNTPOFF:
12254 fputs ("@indntpoff", file);
12255 break;
12256 #if TARGET_MACHO
12257 case UNSPEC_MACHOPIC_OFFSET:
12258 putc ('-', file);
12259 machopic_output_function_base_name (file);
12260 break;
12261 #endif
12262 default:
12263 output_operand_lossage ("invalid UNSPEC as operand");
12264 break;
12265 }
12266 break;
12267
12268 default:
12269 output_operand_lossage ("invalid expression as operand");
12270 }
12271 }
12272
12273 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12274 We need to emit DTP-relative relocations. */
12275
12276 static void ATTRIBUTE_UNUSED
12277 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
12278 {
12279 fputs (ASM_LONG, file);
12280 output_addr_const (file, x);
12281 fputs ("@dtpoff", file);
12282 switch (size)
12283 {
12284 case 4:
12285 break;
12286 case 8:
12287 fputs (", 0", file);
12288 break;
12289 default:
12290 gcc_unreachable ();
12291 }
12292 }
12293
12294 /* Return true if X is a representation of the PIC register. This copes
12295 with calls from ix86_find_base_term, where the register might have
12296 been replaced by a cselib value. */
12297
12298 static bool
12299 ix86_pic_register_p (rtx x)
12300 {
12301 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
12302 return (pic_offset_table_rtx
12303 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
12304 else
12305 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
12306 }
12307
12308 /* In the name of slightly smaller debug output, and to cater to
12309 general assembler lossage, recognize PIC+GOTOFF and turn it back
12310 into a direct symbol reference.
12311
12312 On Darwin, this is necessary to avoid a crash, because Darwin
12313 has a different PIC label for each routine but the DWARF debugging
12314 information is not associated with any particular routine, so it's
12315 necessary to remove references to the PIC label from RTL stored by
12316 the DWARF output code. */
12317
12318 static rtx
12319 ix86_delegitimize_address (rtx x)
12320 {
12321 rtx orig_x = delegitimize_mem_from_attrs (x);
12322 /* addend is NULL or some rtx if x is something+GOTOFF where
12323 something doesn't include the PIC register. */
12324 rtx addend = NULL_RTX;
12325 /* reg_addend is NULL or a multiple of some register. */
12326 rtx reg_addend = NULL_RTX;
12327 /* const_addend is NULL or a const_int. */
12328 rtx const_addend = NULL_RTX;
12329 /* This is the result, or NULL. */
12330 rtx result = NULL_RTX;
12331
12332 x = orig_x;
12333
12334 if (MEM_P (x))
12335 x = XEXP (x, 0);
12336
12337 if (TARGET_64BIT)
12338 {
12339 if (GET_CODE (x) != CONST
12340 || GET_CODE (XEXP (x, 0)) != UNSPEC
12341 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
12342 || !MEM_P (orig_x))
12343 return orig_x;
12344 x = XVECEXP (XEXP (x, 0), 0, 0);
12345 if (GET_MODE (orig_x) != Pmode)
12346 return simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
12347 return x;
12348 }
12349
12350 if (GET_CODE (x) != PLUS
12351 || GET_CODE (XEXP (x, 1)) != CONST)
12352 return orig_x;
12353
12354 if (ix86_pic_register_p (XEXP (x, 0)))
12355 /* %ebx + GOT/GOTOFF */
12356 ;
12357 else if (GET_CODE (XEXP (x, 0)) == PLUS)
12358 {
12359 /* %ebx + %reg * scale + GOT/GOTOFF */
12360 reg_addend = XEXP (x, 0);
12361 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
12362 reg_addend = XEXP (reg_addend, 1);
12363 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
12364 reg_addend = XEXP (reg_addend, 0);
12365 else
12366 {
12367 reg_addend = NULL_RTX;
12368 addend = XEXP (x, 0);
12369 }
12370 }
12371 else
12372 addend = XEXP (x, 0);
12373
12374 x = XEXP (XEXP (x, 1), 0);
12375 if (GET_CODE (x) == PLUS
12376 && CONST_INT_P (XEXP (x, 1)))
12377 {
12378 const_addend = XEXP (x, 1);
12379 x = XEXP (x, 0);
12380 }
12381
12382 if (GET_CODE (x) == UNSPEC
12383 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
12384 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
12385 result = XVECEXP (x, 0, 0);
12386
12387 if (TARGET_MACHO && darwin_local_data_pic (x)
12388 && !MEM_P (orig_x))
12389 result = XVECEXP (x, 0, 0);
12390
12391 if (! result)
12392 return orig_x;
12393
12394 if (const_addend)
12395 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
12396 if (reg_addend)
12397 result = gen_rtx_PLUS (Pmode, reg_addend, result);
12398 if (addend)
12399 {
12400 /* If the rest of original X doesn't involve the PIC register, add
12401 addend and subtract pic_offset_table_rtx. This can happen e.g.
12402 for code like:
12403 leal (%ebx, %ecx, 4), %ecx
12404 ...
12405 movl foo@GOTOFF(%ecx), %edx
12406 in which case we return (%ecx - %ebx) + foo. */
12407 if (pic_offset_table_rtx)
12408 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
12409 pic_offset_table_rtx),
12410 result);
12411 else
12412 return orig_x;
12413 }
12414 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
12415 return simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
12416 return result;
12417 }
12418
12419 /* If X is a machine specific address (i.e. a symbol or label being
12420 referenced as a displacement from the GOT implemented using an
12421 UNSPEC), then return the base term. Otherwise return X. */
12422
12423 rtx
12424 ix86_find_base_term (rtx x)
12425 {
12426 rtx term;
12427
12428 if (TARGET_64BIT)
12429 {
12430 if (GET_CODE (x) != CONST)
12431 return x;
12432 term = XEXP (x, 0);
12433 if (GET_CODE (term) == PLUS
12434 && (CONST_INT_P (XEXP (term, 1))
12435 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
12436 term = XEXP (term, 0);
12437 if (GET_CODE (term) != UNSPEC
12438 || XINT (term, 1) != UNSPEC_GOTPCREL)
12439 return x;
12440
12441 return XVECEXP (term, 0, 0);
12442 }
12443
12444 return ix86_delegitimize_address (x);
12445 }
12446 \f
12447 static void
12448 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
12449 int fp, FILE *file)
12450 {
12451 const char *suffix;
12452
12453 if (mode == CCFPmode || mode == CCFPUmode)
12454 {
12455 code = ix86_fp_compare_code_to_integer (code);
12456 mode = CCmode;
12457 }
12458 if (reverse)
12459 code = reverse_condition (code);
12460
12461 switch (code)
12462 {
12463 case EQ:
12464 switch (mode)
12465 {
12466 case CCAmode:
12467 suffix = "a";
12468 break;
12469
12470 case CCCmode:
12471 suffix = "c";
12472 break;
12473
12474 case CCOmode:
12475 suffix = "o";
12476 break;
12477
12478 case CCSmode:
12479 suffix = "s";
12480 break;
12481
12482 default:
12483 suffix = "e";
12484 }
12485 break;
12486 case NE:
12487 switch (mode)
12488 {
12489 case CCAmode:
12490 suffix = "na";
12491 break;
12492
12493 case CCCmode:
12494 suffix = "nc";
12495 break;
12496
12497 case CCOmode:
12498 suffix = "no";
12499 break;
12500
12501 case CCSmode:
12502 suffix = "ns";
12503 break;
12504
12505 default:
12506 suffix = "ne";
12507 }
12508 break;
12509 case GT:
12510 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
12511 suffix = "g";
12512 break;
12513 case GTU:
12514 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
12515 Those same assemblers have the same but opposite lossage on cmov. */
12516 if (mode == CCmode)
12517 suffix = fp ? "nbe" : "a";
12518 else if (mode == CCCmode)
12519 suffix = "b";
12520 else
12521 gcc_unreachable ();
12522 break;
12523 case LT:
12524 switch (mode)
12525 {
12526 case CCNOmode:
12527 case CCGOCmode:
12528 suffix = "s";
12529 break;
12530
12531 case CCmode:
12532 case CCGCmode:
12533 suffix = "l";
12534 break;
12535
12536 default:
12537 gcc_unreachable ();
12538 }
12539 break;
12540 case LTU:
12541 gcc_assert (mode == CCmode || mode == CCCmode);
12542 suffix = "b";
12543 break;
12544 case GE:
12545 switch (mode)
12546 {
12547 case CCNOmode:
12548 case CCGOCmode:
12549 suffix = "ns";
12550 break;
12551
12552 case CCmode:
12553 case CCGCmode:
12554 suffix = "ge";
12555 break;
12556
12557 default:
12558 gcc_unreachable ();
12559 }
12560 break;
12561 case GEU:
12562 /* ??? As above. */
12563 gcc_assert (mode == CCmode || mode == CCCmode);
12564 suffix = fp ? "nb" : "ae";
12565 break;
12566 case LE:
12567 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
12568 suffix = "le";
12569 break;
12570 case LEU:
12571 /* ??? As above. */
12572 if (mode == CCmode)
12573 suffix = "be";
12574 else if (mode == CCCmode)
12575 suffix = fp ? "nb" : "ae";
12576 else
12577 gcc_unreachable ();
12578 break;
12579 case UNORDERED:
12580 suffix = fp ? "u" : "p";
12581 break;
12582 case ORDERED:
12583 suffix = fp ? "nu" : "np";
12584 break;
12585 default:
12586 gcc_unreachable ();
12587 }
12588 fputs (suffix, file);
12589 }
12590
12591 /* Print the name of register X to FILE based on its machine mode and number.
12592 If CODE is 'w', pretend the mode is HImode.
12593 If CODE is 'b', pretend the mode is QImode.
12594 If CODE is 'k', pretend the mode is SImode.
12595 If CODE is 'q', pretend the mode is DImode.
12596 If CODE is 'x', pretend the mode is V4SFmode.
12597 If CODE is 't', pretend the mode is V8SFmode.
12598 If CODE is 'h', pretend the reg is the 'high' byte register.
12599 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
12600 If CODE is 'd', duplicate the operand for AVX instruction.
12601 */
12602
12603 void
12604 print_reg (rtx x, int code, FILE *file)
12605 {
12606 const char *reg;
12607 bool duplicated = code == 'd' && TARGET_AVX;
12608
12609 gcc_assert (x == pc_rtx
12610 || (REGNO (x) != ARG_POINTER_REGNUM
12611 && REGNO (x) != FRAME_POINTER_REGNUM
12612 && REGNO (x) != FLAGS_REG
12613 && REGNO (x) != FPSR_REG
12614 && REGNO (x) != FPCR_REG));
12615
12616 if (ASSEMBLER_DIALECT == ASM_ATT)
12617 putc ('%', file);
12618
12619 if (x == pc_rtx)
12620 {
12621 gcc_assert (TARGET_64BIT);
12622 fputs ("rip", file);
12623 return;
12624 }
12625
12626 if (code == 'w' || MMX_REG_P (x))
12627 code = 2;
12628 else if (code == 'b')
12629 code = 1;
12630 else if (code == 'k')
12631 code = 4;
12632 else if (code == 'q')
12633 code = 8;
12634 else if (code == 'y')
12635 code = 3;
12636 else if (code == 'h')
12637 code = 0;
12638 else if (code == 'x')
12639 code = 16;
12640 else if (code == 't')
12641 code = 32;
12642 else
12643 code = GET_MODE_SIZE (GET_MODE (x));
12644
12645 /* Irritatingly, AMD extended registers use different naming convention
12646 from the normal registers. */
12647 if (REX_INT_REG_P (x))
12648 {
12649 gcc_assert (TARGET_64BIT);
12650 switch (code)
12651 {
12652 case 0:
12653 error ("extended registers have no high halves");
12654 break;
12655 case 1:
12656 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
12657 break;
12658 case 2:
12659 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
12660 break;
12661 case 4:
12662 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
12663 break;
12664 case 8:
12665 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
12666 break;
12667 default:
12668 error ("unsupported operand size for extended register");
12669 break;
12670 }
12671 return;
12672 }
12673
12674 reg = NULL;
12675 switch (code)
12676 {
12677 case 3:
12678 if (STACK_TOP_P (x))
12679 {
12680 reg = "st(0)";
12681 break;
12682 }
12683 /* FALLTHRU */
12684 case 8:
12685 case 4:
12686 case 12:
12687 if (! ANY_FP_REG_P (x))
12688 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
12689 /* FALLTHRU */
12690 case 16:
12691 case 2:
12692 normal:
12693 reg = hi_reg_name[REGNO (x)];
12694 break;
12695 case 1:
12696 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
12697 goto normal;
12698 reg = qi_reg_name[REGNO (x)];
12699 break;
12700 case 0:
12701 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
12702 goto normal;
12703 reg = qi_high_reg_name[REGNO (x)];
12704 break;
12705 case 32:
12706 if (SSE_REG_P (x))
12707 {
12708 gcc_assert (!duplicated);
12709 putc ('y', file);
12710 fputs (hi_reg_name[REGNO (x)] + 1, file);
12711 return;
12712 }
12713 break;
12714 default:
12715 gcc_unreachable ();
12716 }
12717
12718 fputs (reg, file);
12719 if (duplicated)
12720 {
12721 if (ASSEMBLER_DIALECT == ASM_ATT)
12722 fprintf (file, ", %%%s", reg);
12723 else
12724 fprintf (file, ", %s", reg);
12725 }
12726 }
12727
12728 /* Locate some local-dynamic symbol still in use by this function
12729 so that we can print its name in some tls_local_dynamic_base
12730 pattern. */
12731
12732 static int
12733 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
12734 {
12735 rtx x = *px;
12736
12737 if (GET_CODE (x) == SYMBOL_REF
12738 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
12739 {
12740 cfun->machine->some_ld_name = XSTR (x, 0);
12741 return 1;
12742 }
12743
12744 return 0;
12745 }
12746
12747 static const char *
12748 get_some_local_dynamic_name (void)
12749 {
12750 rtx insn;
12751
12752 if (cfun->machine->some_ld_name)
12753 return cfun->machine->some_ld_name;
12754
12755 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
12756 if (NONDEBUG_INSN_P (insn)
12757 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
12758 return cfun->machine->some_ld_name;
12759
12760 return NULL;
12761 }
12762
12763 /* Meaning of CODE:
12764 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
12765 C -- print opcode suffix for set/cmov insn.
12766 c -- like C, but print reversed condition
12767 F,f -- likewise, but for floating-point.
12768 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
12769 otherwise nothing
12770 R -- print the prefix for register names.
12771 z -- print the opcode suffix for the size of the current operand.
12772 Z -- likewise, with special suffixes for x87 instructions.
12773 * -- print a star (in certain assembler syntax)
12774 A -- print an absolute memory reference.
12775 w -- print the operand as if it's a "word" (HImode) even if it isn't.
12776 s -- print a shift double count, followed by the assemblers argument
12777 delimiter.
12778 b -- print the QImode name of the register for the indicated operand.
12779 %b0 would print %al if operands[0] is reg 0.
12780 w -- likewise, print the HImode name of the register.
12781 k -- likewise, print the SImode name of the register.
12782 q -- likewise, print the DImode name of the register.
12783 x -- likewise, print the V4SFmode name of the register.
12784 t -- likewise, print the V8SFmode name of the register.
12785 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
12786 y -- print "st(0)" instead of "st" as a register.
12787 d -- print duplicated register operand for AVX instruction.
12788 D -- print condition for SSE cmp instruction.
12789 P -- if PIC, print an @PLT suffix.
12790 X -- don't print any sort of PIC '@' suffix for a symbol.
12791 & -- print some in-use local-dynamic symbol name.
12792 H -- print a memory address offset by 8; used for sse high-parts
12793 Y -- print condition for XOP pcom* instruction.
12794 + -- print a branch hint as 'cs' or 'ds' prefix
12795 ; -- print a semicolon (after prefixes due to bug in older gas).
12796 @ -- print a segment register of thread base pointer load
12797 */
12798
12799 void
12800 ix86_print_operand (FILE *file, rtx x, int code)
12801 {
12802 if (code)
12803 {
12804 switch (code)
12805 {
12806 case '*':
12807 if (ASSEMBLER_DIALECT == ASM_ATT)
12808 putc ('*', file);
12809 return;
12810
12811 case '&':
12812 {
12813 const char *name = get_some_local_dynamic_name ();
12814 if (name == NULL)
12815 output_operand_lossage ("'%%&' used without any "
12816 "local dynamic TLS references");
12817 else
12818 assemble_name (file, name);
12819 return;
12820 }
12821
12822 case 'A':
12823 switch (ASSEMBLER_DIALECT)
12824 {
12825 case ASM_ATT:
12826 putc ('*', file);
12827 break;
12828
12829 case ASM_INTEL:
12830 /* Intel syntax. For absolute addresses, registers should not
12831 be surrounded by braces. */
12832 if (!REG_P (x))
12833 {
12834 putc ('[', file);
12835 ix86_print_operand (file, x, 0);
12836 putc (']', file);
12837 return;
12838 }
12839 break;
12840
12841 default:
12842 gcc_unreachable ();
12843 }
12844
12845 ix86_print_operand (file, x, 0);
12846 return;
12847
12848
12849 case 'L':
12850 if (ASSEMBLER_DIALECT == ASM_ATT)
12851 putc ('l', file);
12852 return;
12853
12854 case 'W':
12855 if (ASSEMBLER_DIALECT == ASM_ATT)
12856 putc ('w', file);
12857 return;
12858
12859 case 'B':
12860 if (ASSEMBLER_DIALECT == ASM_ATT)
12861 putc ('b', file);
12862 return;
12863
12864 case 'Q':
12865 if (ASSEMBLER_DIALECT == ASM_ATT)
12866 putc ('l', file);
12867 return;
12868
12869 case 'S':
12870 if (ASSEMBLER_DIALECT == ASM_ATT)
12871 putc ('s', file);
12872 return;
12873
12874 case 'T':
12875 if (ASSEMBLER_DIALECT == ASM_ATT)
12876 putc ('t', file);
12877 return;
12878
12879 case 'z':
12880 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12881 {
12882 /* Opcodes don't get size suffixes if using Intel opcodes. */
12883 if (ASSEMBLER_DIALECT == ASM_INTEL)
12884 return;
12885
12886 switch (GET_MODE_SIZE (GET_MODE (x)))
12887 {
12888 case 1:
12889 putc ('b', file);
12890 return;
12891
12892 case 2:
12893 putc ('w', file);
12894 return;
12895
12896 case 4:
12897 putc ('l', file);
12898 return;
12899
12900 case 8:
12901 putc ('q', file);
12902 return;
12903
12904 default:
12905 output_operand_lossage
12906 ("invalid operand size for operand code '%c'", code);
12907 return;
12908 }
12909 }
12910
12911 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12912 warning
12913 (0, "non-integer operand used with operand code '%c'", code);
12914 /* FALLTHRU */
12915
12916 case 'Z':
12917 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
12918 if (ASSEMBLER_DIALECT == ASM_INTEL)
12919 return;
12920
12921 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12922 {
12923 switch (GET_MODE_SIZE (GET_MODE (x)))
12924 {
12925 case 2:
12926 #ifdef HAVE_AS_IX86_FILDS
12927 putc ('s', file);
12928 #endif
12929 return;
12930
12931 case 4:
12932 putc ('l', file);
12933 return;
12934
12935 case 8:
12936 #ifdef HAVE_AS_IX86_FILDQ
12937 putc ('q', file);
12938 #else
12939 fputs ("ll", file);
12940 #endif
12941 return;
12942
12943 default:
12944 break;
12945 }
12946 }
12947 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12948 {
12949 /* 387 opcodes don't get size suffixes
12950 if the operands are registers. */
12951 if (STACK_REG_P (x))
12952 return;
12953
12954 switch (GET_MODE_SIZE (GET_MODE (x)))
12955 {
12956 case 4:
12957 putc ('s', file);
12958 return;
12959
12960 case 8:
12961 putc ('l', file);
12962 return;
12963
12964 case 12:
12965 case 16:
12966 putc ('t', file);
12967 return;
12968
12969 default:
12970 break;
12971 }
12972 }
12973 else
12974 {
12975 output_operand_lossage
12976 ("invalid operand type used with operand code '%c'", code);
12977 return;
12978 }
12979
12980 output_operand_lossage
12981 ("invalid operand size for operand code '%c'", code);
12982 return;
12983
12984 case 'd':
12985 case 'b':
12986 case 'w':
12987 case 'k':
12988 case 'q':
12989 case 'h':
12990 case 't':
12991 case 'y':
12992 case 'x':
12993 case 'X':
12994 case 'P':
12995 break;
12996
12997 case 's':
12998 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
12999 {
13000 ix86_print_operand (file, x, 0);
13001 fputs (", ", file);
13002 }
13003 return;
13004
13005 case 'D':
13006 /* Little bit of braindamage here. The SSE compare instructions
13007 does use completely different names for the comparisons that the
13008 fp conditional moves. */
13009 if (TARGET_AVX)
13010 {
13011 switch (GET_CODE (x))
13012 {
13013 case EQ:
13014 fputs ("eq", file);
13015 break;
13016 case UNEQ:
13017 fputs ("eq_us", file);
13018 break;
13019 case LT:
13020 fputs ("lt", file);
13021 break;
13022 case UNLT:
13023 fputs ("nge", file);
13024 break;
13025 case LE:
13026 fputs ("le", file);
13027 break;
13028 case UNLE:
13029 fputs ("ngt", file);
13030 break;
13031 case UNORDERED:
13032 fputs ("unord", file);
13033 break;
13034 case NE:
13035 fputs ("neq", file);
13036 break;
13037 case LTGT:
13038 fputs ("neq_oq", file);
13039 break;
13040 case GE:
13041 fputs ("ge", file);
13042 break;
13043 case UNGE:
13044 fputs ("nlt", file);
13045 break;
13046 case GT:
13047 fputs ("gt", file);
13048 break;
13049 case UNGT:
13050 fputs ("nle", file);
13051 break;
13052 case ORDERED:
13053 fputs ("ord", file);
13054 break;
13055 default:
13056 output_operand_lossage ("operand is not a condition code, "
13057 "invalid operand code 'D'");
13058 return;
13059 }
13060 }
13061 else
13062 {
13063 switch (GET_CODE (x))
13064 {
13065 case EQ:
13066 case UNEQ:
13067 fputs ("eq", file);
13068 break;
13069 case LT:
13070 case UNLT:
13071 fputs ("lt", file);
13072 break;
13073 case LE:
13074 case UNLE:
13075 fputs ("le", file);
13076 break;
13077 case UNORDERED:
13078 fputs ("unord", file);
13079 break;
13080 case NE:
13081 case LTGT:
13082 fputs ("neq", file);
13083 break;
13084 case UNGE:
13085 case GE:
13086 fputs ("nlt", file);
13087 break;
13088 case UNGT:
13089 case GT:
13090 fputs ("nle", file);
13091 break;
13092 case ORDERED:
13093 fputs ("ord", file);
13094 break;
13095 default:
13096 output_operand_lossage ("operand is not a condition code, "
13097 "invalid operand code 'D'");
13098 return;
13099 }
13100 }
13101 return;
13102 case 'O':
13103 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13104 if (ASSEMBLER_DIALECT == ASM_ATT)
13105 {
13106 switch (GET_MODE (x))
13107 {
13108 case HImode: putc ('w', file); break;
13109 case SImode:
13110 case SFmode: putc ('l', file); break;
13111 case DImode:
13112 case DFmode: putc ('q', file); break;
13113 default: gcc_unreachable ();
13114 }
13115 putc ('.', file);
13116 }
13117 #endif
13118 return;
13119 case 'C':
13120 if (!COMPARISON_P (x))
13121 {
13122 output_operand_lossage ("operand is neither a constant nor a "
13123 "condition code, invalid operand code "
13124 "'C'");
13125 return;
13126 }
13127 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
13128 return;
13129 case 'F':
13130 if (!COMPARISON_P (x))
13131 {
13132 output_operand_lossage ("operand is neither a constant nor a "
13133 "condition code, invalid operand code "
13134 "'F'");
13135 return;
13136 }
13137 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13138 if (ASSEMBLER_DIALECT == ASM_ATT)
13139 putc ('.', file);
13140 #endif
13141 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
13142 return;
13143
13144 /* Like above, but reverse condition */
13145 case 'c':
13146 /* Check to see if argument to %c is really a constant
13147 and not a condition code which needs to be reversed. */
13148 if (!COMPARISON_P (x))
13149 {
13150 output_operand_lossage ("operand is neither a constant nor a "
13151 "condition code, invalid operand "
13152 "code 'c'");
13153 return;
13154 }
13155 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
13156 return;
13157 case 'f':
13158 if (!COMPARISON_P (x))
13159 {
13160 output_operand_lossage ("operand is neither a constant nor a "
13161 "condition code, invalid operand "
13162 "code 'f'");
13163 return;
13164 }
13165 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13166 if (ASSEMBLER_DIALECT == ASM_ATT)
13167 putc ('.', file);
13168 #endif
13169 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
13170 return;
13171
13172 case 'H':
13173 /* It doesn't actually matter what mode we use here, as we're
13174 only going to use this for printing. */
13175 x = adjust_address_nv (x, DImode, 8);
13176 break;
13177
13178 case '+':
13179 {
13180 rtx x;
13181
13182 if (!optimize
13183 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
13184 return;
13185
13186 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
13187 if (x)
13188 {
13189 int pred_val = INTVAL (XEXP (x, 0));
13190
13191 if (pred_val < REG_BR_PROB_BASE * 45 / 100
13192 || pred_val > REG_BR_PROB_BASE * 55 / 100)
13193 {
13194 int taken = pred_val > REG_BR_PROB_BASE / 2;
13195 int cputaken = final_forward_branch_p (current_output_insn) == 0;
13196
13197 /* Emit hints only in the case default branch prediction
13198 heuristics would fail. */
13199 if (taken != cputaken)
13200 {
13201 /* We use 3e (DS) prefix for taken branches and
13202 2e (CS) prefix for not taken branches. */
13203 if (taken)
13204 fputs ("ds ; ", file);
13205 else
13206 fputs ("cs ; ", file);
13207 }
13208 }
13209 }
13210 return;
13211 }
13212
13213 case 'Y':
13214 switch (GET_CODE (x))
13215 {
13216 case NE:
13217 fputs ("neq", file);
13218 break;
13219 case EQ:
13220 fputs ("eq", file);
13221 break;
13222 case GE:
13223 case GEU:
13224 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
13225 break;
13226 case GT:
13227 case GTU:
13228 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
13229 break;
13230 case LE:
13231 case LEU:
13232 fputs ("le", file);
13233 break;
13234 case LT:
13235 case LTU:
13236 fputs ("lt", file);
13237 break;
13238 case UNORDERED:
13239 fputs ("unord", file);
13240 break;
13241 case ORDERED:
13242 fputs ("ord", file);
13243 break;
13244 case UNEQ:
13245 fputs ("ueq", file);
13246 break;
13247 case UNGE:
13248 fputs ("nlt", file);
13249 break;
13250 case UNGT:
13251 fputs ("nle", file);
13252 break;
13253 case UNLE:
13254 fputs ("ule", file);
13255 break;
13256 case UNLT:
13257 fputs ("ult", file);
13258 break;
13259 case LTGT:
13260 fputs ("une", file);
13261 break;
13262 default:
13263 output_operand_lossage ("operand is not a condition code, "
13264 "invalid operand code 'Y'");
13265 return;
13266 }
13267 return;
13268
13269 case ';':
13270 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
13271 putc (';', file);
13272 #endif
13273 return;
13274
13275 case '@':
13276 if (ASSEMBLER_DIALECT == ASM_ATT)
13277 putc ('%', file);
13278
13279 /* The kernel uses a different segment register for performance
13280 reasons; a system call would not have to trash the userspace
13281 segment register, which would be expensive. */
13282 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
13283 fputs ("fs", file);
13284 else
13285 fputs ("gs", file);
13286 return;
13287
13288 default:
13289 output_operand_lossage ("invalid operand code '%c'", code);
13290 }
13291 }
13292
13293 if (REG_P (x))
13294 print_reg (x, code, file);
13295
13296 else if (MEM_P (x))
13297 {
13298 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
13299 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
13300 && GET_MODE (x) != BLKmode)
13301 {
13302 const char * size;
13303 switch (GET_MODE_SIZE (GET_MODE (x)))
13304 {
13305 case 1: size = "BYTE"; break;
13306 case 2: size = "WORD"; break;
13307 case 4: size = "DWORD"; break;
13308 case 8: size = "QWORD"; break;
13309 case 12: size = "TBYTE"; break;
13310 case 16:
13311 if (GET_MODE (x) == XFmode)
13312 size = "TBYTE";
13313 else
13314 size = "XMMWORD";
13315 break;
13316 case 32: size = "YMMWORD"; break;
13317 default:
13318 gcc_unreachable ();
13319 }
13320
13321 /* Check for explicit size override (codes 'b', 'w' and 'k') */
13322 if (code == 'b')
13323 size = "BYTE";
13324 else if (code == 'w')
13325 size = "WORD";
13326 else if (code == 'k')
13327 size = "DWORD";
13328
13329 fputs (size, file);
13330 fputs (" PTR ", file);
13331 }
13332
13333 x = XEXP (x, 0);
13334 /* Avoid (%rip) for call operands. */
13335 if (CONSTANT_ADDRESS_P (x) && code == 'P'
13336 && !CONST_INT_P (x))
13337 output_addr_const (file, x);
13338 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
13339 output_operand_lossage ("invalid constraints for operand");
13340 else
13341 output_address (x);
13342 }
13343
13344 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
13345 {
13346 REAL_VALUE_TYPE r;
13347 long l;
13348
13349 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13350 REAL_VALUE_TO_TARGET_SINGLE (r, l);
13351
13352 if (ASSEMBLER_DIALECT == ASM_ATT)
13353 putc ('$', file);
13354 /* Sign extend 32bit SFmode immediate to 8 bytes. */
13355 if (code == 'q')
13356 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
13357 else
13358 fprintf (file, "0x%08x", (unsigned int) l);
13359 }
13360
13361 /* These float cases don't actually occur as immediate operands. */
13362 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
13363 {
13364 char dstr[30];
13365
13366 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
13367 fputs (dstr, file);
13368 }
13369
13370 else if (GET_CODE (x) == CONST_DOUBLE
13371 && GET_MODE (x) == XFmode)
13372 {
13373 char dstr[30];
13374
13375 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
13376 fputs (dstr, file);
13377 }
13378
13379 else
13380 {
13381 /* We have patterns that allow zero sets of memory, for instance.
13382 In 64-bit mode, we should probably support all 8-byte vectors,
13383 since we can in fact encode that into an immediate. */
13384 if (GET_CODE (x) == CONST_VECTOR)
13385 {
13386 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
13387 x = const0_rtx;
13388 }
13389
13390 if (code != 'P')
13391 {
13392 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
13393 {
13394 if (ASSEMBLER_DIALECT == ASM_ATT)
13395 putc ('$', file);
13396 }
13397 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
13398 || GET_CODE (x) == LABEL_REF)
13399 {
13400 if (ASSEMBLER_DIALECT == ASM_ATT)
13401 putc ('$', file);
13402 else
13403 fputs ("OFFSET FLAT:", file);
13404 }
13405 }
13406 if (CONST_INT_P (x))
13407 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13408 else if (flag_pic)
13409 output_pic_addr_const (file, x, code);
13410 else
13411 output_addr_const (file, x);
13412 }
13413 }
13414
13415 static bool
13416 ix86_print_operand_punct_valid_p (unsigned char code)
13417 {
13418 return (code == '@' || code == '*' || code == '+'
13419 || code == '&' || code == ';');
13420 }
13421 \f
13422 /* Print a memory operand whose address is ADDR. */
13423
13424 static void
13425 ix86_print_operand_address (FILE *file, rtx addr)
13426 {
13427 struct ix86_address parts;
13428 rtx base, index, disp;
13429 int scale;
13430 int ok = ix86_decompose_address (addr, &parts);
13431
13432 gcc_assert (ok);
13433
13434 base = parts.base;
13435 index = parts.index;
13436 disp = parts.disp;
13437 scale = parts.scale;
13438
13439 switch (parts.seg)
13440 {
13441 case SEG_DEFAULT:
13442 break;
13443 case SEG_FS:
13444 case SEG_GS:
13445 if (ASSEMBLER_DIALECT == ASM_ATT)
13446 putc ('%', file);
13447 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
13448 break;
13449 default:
13450 gcc_unreachable ();
13451 }
13452
13453 /* Use one byte shorter RIP relative addressing for 64bit mode. */
13454 if (TARGET_64BIT && !base && !index)
13455 {
13456 rtx symbol = disp;
13457
13458 if (GET_CODE (disp) == CONST
13459 && GET_CODE (XEXP (disp, 0)) == PLUS
13460 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13461 symbol = XEXP (XEXP (disp, 0), 0);
13462
13463 if (GET_CODE (symbol) == LABEL_REF
13464 || (GET_CODE (symbol) == SYMBOL_REF
13465 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
13466 base = pc_rtx;
13467 }
13468 if (!base && !index)
13469 {
13470 /* Displacement only requires special attention. */
13471
13472 if (CONST_INT_P (disp))
13473 {
13474 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
13475 fputs ("ds:", file);
13476 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
13477 }
13478 else if (flag_pic)
13479 output_pic_addr_const (file, disp, 0);
13480 else
13481 output_addr_const (file, disp);
13482 }
13483 else
13484 {
13485 if (ASSEMBLER_DIALECT == ASM_ATT)
13486 {
13487 if (disp)
13488 {
13489 if (flag_pic)
13490 output_pic_addr_const (file, disp, 0);
13491 else if (GET_CODE (disp) == LABEL_REF)
13492 output_asm_label (disp);
13493 else
13494 output_addr_const (file, disp);
13495 }
13496
13497 putc ('(', file);
13498 if (base)
13499 print_reg (base, 0, file);
13500 if (index)
13501 {
13502 putc (',', file);
13503 print_reg (index, 0, file);
13504 if (scale != 1)
13505 fprintf (file, ",%d", scale);
13506 }
13507 putc (')', file);
13508 }
13509 else
13510 {
13511 rtx offset = NULL_RTX;
13512
13513 if (disp)
13514 {
13515 /* Pull out the offset of a symbol; print any symbol itself. */
13516 if (GET_CODE (disp) == CONST
13517 && GET_CODE (XEXP (disp, 0)) == PLUS
13518 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13519 {
13520 offset = XEXP (XEXP (disp, 0), 1);
13521 disp = gen_rtx_CONST (VOIDmode,
13522 XEXP (XEXP (disp, 0), 0));
13523 }
13524
13525 if (flag_pic)
13526 output_pic_addr_const (file, disp, 0);
13527 else if (GET_CODE (disp) == LABEL_REF)
13528 output_asm_label (disp);
13529 else if (CONST_INT_P (disp))
13530 offset = disp;
13531 else
13532 output_addr_const (file, disp);
13533 }
13534
13535 putc ('[', file);
13536 if (base)
13537 {
13538 print_reg (base, 0, file);
13539 if (offset)
13540 {
13541 if (INTVAL (offset) >= 0)
13542 putc ('+', file);
13543 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13544 }
13545 }
13546 else if (offset)
13547 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13548 else
13549 putc ('0', file);
13550
13551 if (index)
13552 {
13553 putc ('+', file);
13554 print_reg (index, 0, file);
13555 if (scale != 1)
13556 fprintf (file, "*%d", scale);
13557 }
13558 putc (']', file);
13559 }
13560 }
13561 }
13562
13563 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13564
13565 static bool
13566 i386_asm_output_addr_const_extra (FILE *file, rtx x)
13567 {
13568 rtx op;
13569
13570 if (GET_CODE (x) != UNSPEC)
13571 return false;
13572
13573 op = XVECEXP (x, 0, 0);
13574 switch (XINT (x, 1))
13575 {
13576 case UNSPEC_GOTTPOFF:
13577 output_addr_const (file, op);
13578 /* FIXME: This might be @TPOFF in Sun ld. */
13579 fputs ("@gottpoff", file);
13580 break;
13581 case UNSPEC_TPOFF:
13582 output_addr_const (file, op);
13583 fputs ("@tpoff", file);
13584 break;
13585 case UNSPEC_NTPOFF:
13586 output_addr_const (file, op);
13587 if (TARGET_64BIT)
13588 fputs ("@tpoff", file);
13589 else
13590 fputs ("@ntpoff", file);
13591 break;
13592 case UNSPEC_DTPOFF:
13593 output_addr_const (file, op);
13594 fputs ("@dtpoff", file);
13595 break;
13596 case UNSPEC_GOTNTPOFF:
13597 output_addr_const (file, op);
13598 if (TARGET_64BIT)
13599 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13600 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
13601 else
13602 fputs ("@gotntpoff", file);
13603 break;
13604 case UNSPEC_INDNTPOFF:
13605 output_addr_const (file, op);
13606 fputs ("@indntpoff", file);
13607 break;
13608 #if TARGET_MACHO
13609 case UNSPEC_MACHOPIC_OFFSET:
13610 output_addr_const (file, op);
13611 putc ('-', file);
13612 machopic_output_function_base_name (file);
13613 break;
13614 #endif
13615
13616 case UNSPEC_STACK_CHECK:
13617 {
13618 int offset;
13619
13620 gcc_assert (flag_split_stack);
13621
13622 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
13623 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
13624 #else
13625 gcc_unreachable ();
13626 #endif
13627
13628 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
13629 }
13630 break;
13631
13632 default:
13633 return false;
13634 }
13635
13636 return true;
13637 }
13638 \f
13639 /* Split one or more double-mode RTL references into pairs of half-mode
13640 references. The RTL can be REG, offsettable MEM, integer constant, or
13641 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
13642 split and "num" is its length. lo_half and hi_half are output arrays
13643 that parallel "operands". */
13644
13645 void
13646 split_double_mode (enum machine_mode mode, rtx operands[],
13647 int num, rtx lo_half[], rtx hi_half[])
13648 {
13649 enum machine_mode half_mode;
13650 unsigned int byte;
13651
13652 switch (mode)
13653 {
13654 case TImode:
13655 half_mode = DImode;
13656 break;
13657 case DImode:
13658 half_mode = SImode;
13659 break;
13660 default:
13661 gcc_unreachable ();
13662 }
13663
13664 byte = GET_MODE_SIZE (half_mode);
13665
13666 while (num--)
13667 {
13668 rtx op = operands[num];
13669
13670 /* simplify_subreg refuse to split volatile memory addresses,
13671 but we still have to handle it. */
13672 if (MEM_P (op))
13673 {
13674 lo_half[num] = adjust_address (op, half_mode, 0);
13675 hi_half[num] = adjust_address (op, half_mode, byte);
13676 }
13677 else
13678 {
13679 lo_half[num] = simplify_gen_subreg (half_mode, op,
13680 GET_MODE (op) == VOIDmode
13681 ? mode : GET_MODE (op), 0);
13682 hi_half[num] = simplify_gen_subreg (half_mode, op,
13683 GET_MODE (op) == VOIDmode
13684 ? mode : GET_MODE (op), byte);
13685 }
13686 }
13687 }
13688 \f
13689 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
13690 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
13691 is the expression of the binary operation. The output may either be
13692 emitted here, or returned to the caller, like all output_* functions.
13693
13694 There is no guarantee that the operands are the same mode, as they
13695 might be within FLOAT or FLOAT_EXTEND expressions. */
13696
13697 #ifndef SYSV386_COMPAT
13698 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
13699 wants to fix the assemblers because that causes incompatibility
13700 with gcc. No-one wants to fix gcc because that causes
13701 incompatibility with assemblers... You can use the option of
13702 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
13703 #define SYSV386_COMPAT 1
13704 #endif
13705
13706 const char *
13707 output_387_binary_op (rtx insn, rtx *operands)
13708 {
13709 static char buf[40];
13710 const char *p;
13711 const char *ssep;
13712 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
13713
13714 #ifdef ENABLE_CHECKING
13715 /* Even if we do not want to check the inputs, this documents input
13716 constraints. Which helps in understanding the following code. */
13717 if (STACK_REG_P (operands[0])
13718 && ((REG_P (operands[1])
13719 && REGNO (operands[0]) == REGNO (operands[1])
13720 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
13721 || (REG_P (operands[2])
13722 && REGNO (operands[0]) == REGNO (operands[2])
13723 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
13724 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
13725 ; /* ok */
13726 else
13727 gcc_assert (is_sse);
13728 #endif
13729
13730 switch (GET_CODE (operands[3]))
13731 {
13732 case PLUS:
13733 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13734 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13735 p = "fiadd";
13736 else
13737 p = "fadd";
13738 ssep = "vadd";
13739 break;
13740
13741 case MINUS:
13742 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13743 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13744 p = "fisub";
13745 else
13746 p = "fsub";
13747 ssep = "vsub";
13748 break;
13749
13750 case MULT:
13751 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13752 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13753 p = "fimul";
13754 else
13755 p = "fmul";
13756 ssep = "vmul";
13757 break;
13758
13759 case DIV:
13760 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13761 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13762 p = "fidiv";
13763 else
13764 p = "fdiv";
13765 ssep = "vdiv";
13766 break;
13767
13768 default:
13769 gcc_unreachable ();
13770 }
13771
13772 if (is_sse)
13773 {
13774 if (TARGET_AVX)
13775 {
13776 strcpy (buf, ssep);
13777 if (GET_MODE (operands[0]) == SFmode)
13778 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
13779 else
13780 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
13781 }
13782 else
13783 {
13784 strcpy (buf, ssep + 1);
13785 if (GET_MODE (operands[0]) == SFmode)
13786 strcat (buf, "ss\t{%2, %0|%0, %2}");
13787 else
13788 strcat (buf, "sd\t{%2, %0|%0, %2}");
13789 }
13790 return buf;
13791 }
13792 strcpy (buf, p);
13793
13794 switch (GET_CODE (operands[3]))
13795 {
13796 case MULT:
13797 case PLUS:
13798 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
13799 {
13800 rtx temp = operands[2];
13801 operands[2] = operands[1];
13802 operands[1] = temp;
13803 }
13804
13805 /* know operands[0] == operands[1]. */
13806
13807 if (MEM_P (operands[2]))
13808 {
13809 p = "%Z2\t%2";
13810 break;
13811 }
13812
13813 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13814 {
13815 if (STACK_TOP_P (operands[0]))
13816 /* How is it that we are storing to a dead operand[2]?
13817 Well, presumably operands[1] is dead too. We can't
13818 store the result to st(0) as st(0) gets popped on this
13819 instruction. Instead store to operands[2] (which I
13820 think has to be st(1)). st(1) will be popped later.
13821 gcc <= 2.8.1 didn't have this check and generated
13822 assembly code that the Unixware assembler rejected. */
13823 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13824 else
13825 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13826 break;
13827 }
13828
13829 if (STACK_TOP_P (operands[0]))
13830 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13831 else
13832 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13833 break;
13834
13835 case MINUS:
13836 case DIV:
13837 if (MEM_P (operands[1]))
13838 {
13839 p = "r%Z1\t%1";
13840 break;
13841 }
13842
13843 if (MEM_P (operands[2]))
13844 {
13845 p = "%Z2\t%2";
13846 break;
13847 }
13848
13849 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13850 {
13851 #if SYSV386_COMPAT
13852 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
13853 derived assemblers, confusingly reverse the direction of
13854 the operation for fsub{r} and fdiv{r} when the
13855 destination register is not st(0). The Intel assembler
13856 doesn't have this brain damage. Read !SYSV386_COMPAT to
13857 figure out what the hardware really does. */
13858 if (STACK_TOP_P (operands[0]))
13859 p = "{p\t%0, %2|rp\t%2, %0}";
13860 else
13861 p = "{rp\t%2, %0|p\t%0, %2}";
13862 #else
13863 if (STACK_TOP_P (operands[0]))
13864 /* As above for fmul/fadd, we can't store to st(0). */
13865 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13866 else
13867 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13868 #endif
13869 break;
13870 }
13871
13872 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
13873 {
13874 #if SYSV386_COMPAT
13875 if (STACK_TOP_P (operands[0]))
13876 p = "{rp\t%0, %1|p\t%1, %0}";
13877 else
13878 p = "{p\t%1, %0|rp\t%0, %1}";
13879 #else
13880 if (STACK_TOP_P (operands[0]))
13881 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
13882 else
13883 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
13884 #endif
13885 break;
13886 }
13887
13888 if (STACK_TOP_P (operands[0]))
13889 {
13890 if (STACK_TOP_P (operands[1]))
13891 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13892 else
13893 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
13894 break;
13895 }
13896 else if (STACK_TOP_P (operands[1]))
13897 {
13898 #if SYSV386_COMPAT
13899 p = "{\t%1, %0|r\t%0, %1}";
13900 #else
13901 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
13902 #endif
13903 }
13904 else
13905 {
13906 #if SYSV386_COMPAT
13907 p = "{r\t%2, %0|\t%0, %2}";
13908 #else
13909 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13910 #endif
13911 }
13912 break;
13913
13914 default:
13915 gcc_unreachable ();
13916 }
13917
13918 strcat (buf, p);
13919 return buf;
13920 }
13921
13922 /* Return needed mode for entity in optimize_mode_switching pass. */
13923
13924 int
13925 ix86_mode_needed (int entity, rtx insn)
13926 {
13927 enum attr_i387_cw mode;
13928
13929 /* The mode UNINITIALIZED is used to store control word after a
13930 function call or ASM pattern. The mode ANY specify that function
13931 has no requirements on the control word and make no changes in the
13932 bits we are interested in. */
13933
13934 if (CALL_P (insn)
13935 || (NONJUMP_INSN_P (insn)
13936 && (asm_noperands (PATTERN (insn)) >= 0
13937 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
13938 return I387_CW_UNINITIALIZED;
13939
13940 if (recog_memoized (insn) < 0)
13941 return I387_CW_ANY;
13942
13943 mode = get_attr_i387_cw (insn);
13944
13945 switch (entity)
13946 {
13947 case I387_TRUNC:
13948 if (mode == I387_CW_TRUNC)
13949 return mode;
13950 break;
13951
13952 case I387_FLOOR:
13953 if (mode == I387_CW_FLOOR)
13954 return mode;
13955 break;
13956
13957 case I387_CEIL:
13958 if (mode == I387_CW_CEIL)
13959 return mode;
13960 break;
13961
13962 case I387_MASK_PM:
13963 if (mode == I387_CW_MASK_PM)
13964 return mode;
13965 break;
13966
13967 default:
13968 gcc_unreachable ();
13969 }
13970
13971 return I387_CW_ANY;
13972 }
13973
13974 /* Output code to initialize control word copies used by trunc?f?i and
13975 rounding patterns. CURRENT_MODE is set to current control word,
13976 while NEW_MODE is set to new control word. */
13977
13978 void
13979 emit_i387_cw_initialization (int mode)
13980 {
13981 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
13982 rtx new_mode;
13983
13984 enum ix86_stack_slot slot;
13985
13986 rtx reg = gen_reg_rtx (HImode);
13987
13988 emit_insn (gen_x86_fnstcw_1 (stored_mode));
13989 emit_move_insn (reg, copy_rtx (stored_mode));
13990
13991 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
13992 || optimize_function_for_size_p (cfun))
13993 {
13994 switch (mode)
13995 {
13996 case I387_CW_TRUNC:
13997 /* round toward zero (truncate) */
13998 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
13999 slot = SLOT_CW_TRUNC;
14000 break;
14001
14002 case I387_CW_FLOOR:
14003 /* round down toward -oo */
14004 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14005 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14006 slot = SLOT_CW_FLOOR;
14007 break;
14008
14009 case I387_CW_CEIL:
14010 /* round up toward +oo */
14011 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14012 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14013 slot = SLOT_CW_CEIL;
14014 break;
14015
14016 case I387_CW_MASK_PM:
14017 /* mask precision exception for nearbyint() */
14018 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14019 slot = SLOT_CW_MASK_PM;
14020 break;
14021
14022 default:
14023 gcc_unreachable ();
14024 }
14025 }
14026 else
14027 {
14028 switch (mode)
14029 {
14030 case I387_CW_TRUNC:
14031 /* round toward zero (truncate) */
14032 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
14033 slot = SLOT_CW_TRUNC;
14034 break;
14035
14036 case I387_CW_FLOOR:
14037 /* round down toward -oo */
14038 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
14039 slot = SLOT_CW_FLOOR;
14040 break;
14041
14042 case I387_CW_CEIL:
14043 /* round up toward +oo */
14044 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
14045 slot = SLOT_CW_CEIL;
14046 break;
14047
14048 case I387_CW_MASK_PM:
14049 /* mask precision exception for nearbyint() */
14050 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14051 slot = SLOT_CW_MASK_PM;
14052 break;
14053
14054 default:
14055 gcc_unreachable ();
14056 }
14057 }
14058
14059 gcc_assert (slot < MAX_386_STACK_LOCALS);
14060
14061 new_mode = assign_386_stack_local (HImode, slot);
14062 emit_move_insn (new_mode, reg);
14063 }
14064
14065 /* Output code for INSN to convert a float to a signed int. OPERANDS
14066 are the insn operands. The output may be [HSD]Imode and the input
14067 operand may be [SDX]Fmode. */
14068
14069 const char *
14070 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
14071 {
14072 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14073 int dimode_p = GET_MODE (operands[0]) == DImode;
14074 int round_mode = get_attr_i387_cw (insn);
14075
14076 /* Jump through a hoop or two for DImode, since the hardware has no
14077 non-popping instruction. We used to do this a different way, but
14078 that was somewhat fragile and broke with post-reload splitters. */
14079 if ((dimode_p || fisttp) && !stack_top_dies)
14080 output_asm_insn ("fld\t%y1", operands);
14081
14082 gcc_assert (STACK_TOP_P (operands[1]));
14083 gcc_assert (MEM_P (operands[0]));
14084 gcc_assert (GET_MODE (operands[1]) != TFmode);
14085
14086 if (fisttp)
14087 output_asm_insn ("fisttp%Z0\t%0", operands);
14088 else
14089 {
14090 if (round_mode != I387_CW_ANY)
14091 output_asm_insn ("fldcw\t%3", operands);
14092 if (stack_top_dies || dimode_p)
14093 output_asm_insn ("fistp%Z0\t%0", operands);
14094 else
14095 output_asm_insn ("fist%Z0\t%0", operands);
14096 if (round_mode != I387_CW_ANY)
14097 output_asm_insn ("fldcw\t%2", operands);
14098 }
14099
14100 return "";
14101 }
14102
14103 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14104 have the values zero or one, indicates the ffreep insn's operand
14105 from the OPERANDS array. */
14106
14107 static const char *
14108 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14109 {
14110 if (TARGET_USE_FFREEP)
14111 #ifdef HAVE_AS_IX86_FFREEP
14112 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14113 #else
14114 {
14115 static char retval[32];
14116 int regno = REGNO (operands[opno]);
14117
14118 gcc_assert (FP_REGNO_P (regno));
14119
14120 regno -= FIRST_STACK_REG;
14121
14122 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14123 return retval;
14124 }
14125 #endif
14126
14127 return opno ? "fstp\t%y1" : "fstp\t%y0";
14128 }
14129
14130
14131 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14132 should be used. UNORDERED_P is true when fucom should be used. */
14133
14134 const char *
14135 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
14136 {
14137 int stack_top_dies;
14138 rtx cmp_op0, cmp_op1;
14139 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
14140
14141 if (eflags_p)
14142 {
14143 cmp_op0 = operands[0];
14144 cmp_op1 = operands[1];
14145 }
14146 else
14147 {
14148 cmp_op0 = operands[1];
14149 cmp_op1 = operands[2];
14150 }
14151
14152 if (is_sse)
14153 {
14154 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
14155 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
14156 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
14157 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
14158
14159 if (GET_MODE (operands[0]) == SFmode)
14160 if (unordered_p)
14161 return &ucomiss[TARGET_AVX ? 0 : 1];
14162 else
14163 return &comiss[TARGET_AVX ? 0 : 1];
14164 else
14165 if (unordered_p)
14166 return &ucomisd[TARGET_AVX ? 0 : 1];
14167 else
14168 return &comisd[TARGET_AVX ? 0 : 1];
14169 }
14170
14171 gcc_assert (STACK_TOP_P (cmp_op0));
14172
14173 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14174
14175 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
14176 {
14177 if (stack_top_dies)
14178 {
14179 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
14180 return output_387_ffreep (operands, 1);
14181 }
14182 else
14183 return "ftst\n\tfnstsw\t%0";
14184 }
14185
14186 if (STACK_REG_P (cmp_op1)
14187 && stack_top_dies
14188 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
14189 && REGNO (cmp_op1) != FIRST_STACK_REG)
14190 {
14191 /* If both the top of the 387 stack dies, and the other operand
14192 is also a stack register that dies, then this must be a
14193 `fcompp' float compare */
14194
14195 if (eflags_p)
14196 {
14197 /* There is no double popping fcomi variant. Fortunately,
14198 eflags is immune from the fstp's cc clobbering. */
14199 if (unordered_p)
14200 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
14201 else
14202 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
14203 return output_387_ffreep (operands, 0);
14204 }
14205 else
14206 {
14207 if (unordered_p)
14208 return "fucompp\n\tfnstsw\t%0";
14209 else
14210 return "fcompp\n\tfnstsw\t%0";
14211 }
14212 }
14213 else
14214 {
14215 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
14216
14217 static const char * const alt[16] =
14218 {
14219 "fcom%Z2\t%y2\n\tfnstsw\t%0",
14220 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
14221 "fucom%Z2\t%y2\n\tfnstsw\t%0",
14222 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
14223
14224 "ficom%Z2\t%y2\n\tfnstsw\t%0",
14225 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
14226 NULL,
14227 NULL,
14228
14229 "fcomi\t{%y1, %0|%0, %y1}",
14230 "fcomip\t{%y1, %0|%0, %y1}",
14231 "fucomi\t{%y1, %0|%0, %y1}",
14232 "fucomip\t{%y1, %0|%0, %y1}",
14233
14234 NULL,
14235 NULL,
14236 NULL,
14237 NULL
14238 };
14239
14240 int mask;
14241 const char *ret;
14242
14243 mask = eflags_p << 3;
14244 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
14245 mask |= unordered_p << 1;
14246 mask |= stack_top_dies;
14247
14248 gcc_assert (mask < 16);
14249 ret = alt[mask];
14250 gcc_assert (ret);
14251
14252 return ret;
14253 }
14254 }
14255
14256 void
14257 ix86_output_addr_vec_elt (FILE *file, int value)
14258 {
14259 const char *directive = ASM_LONG;
14260
14261 #ifdef ASM_QUAD
14262 if (TARGET_64BIT)
14263 directive = ASM_QUAD;
14264 #else
14265 gcc_assert (!TARGET_64BIT);
14266 #endif
14267
14268 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
14269 }
14270
14271 void
14272 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
14273 {
14274 const char *directive = ASM_LONG;
14275
14276 #ifdef ASM_QUAD
14277 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
14278 directive = ASM_QUAD;
14279 #else
14280 gcc_assert (!TARGET_64BIT);
14281 #endif
14282 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
14283 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
14284 fprintf (file, "%s%s%d-%s%d\n",
14285 directive, LPREFIX, value, LPREFIX, rel);
14286 else if (HAVE_AS_GOTOFF_IN_DATA)
14287 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
14288 #if TARGET_MACHO
14289 else if (TARGET_MACHO)
14290 {
14291 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
14292 machopic_output_function_base_name (file);
14293 putc ('\n', file);
14294 }
14295 #endif
14296 else
14297 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
14298 GOT_SYMBOL_NAME, LPREFIX, value);
14299 }
14300 \f
14301 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
14302 for the target. */
14303
14304 void
14305 ix86_expand_clear (rtx dest)
14306 {
14307 rtx tmp;
14308
14309 /* We play register width games, which are only valid after reload. */
14310 gcc_assert (reload_completed);
14311
14312 /* Avoid HImode and its attendant prefix byte. */
14313 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
14314 dest = gen_rtx_REG (SImode, REGNO (dest));
14315 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
14316
14317 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
14318 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
14319 {
14320 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14321 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
14322 }
14323
14324 emit_insn (tmp);
14325 }
14326
14327 /* X is an unchanging MEM. If it is a constant pool reference, return
14328 the constant pool rtx, else NULL. */
14329
14330 rtx
14331 maybe_get_pool_constant (rtx x)
14332 {
14333 x = ix86_delegitimize_address (XEXP (x, 0));
14334
14335 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
14336 return get_pool_constant (x);
14337
14338 return NULL_RTX;
14339 }
14340
14341 void
14342 ix86_expand_move (enum machine_mode mode, rtx operands[])
14343 {
14344 rtx op0, op1;
14345 enum tls_model model;
14346
14347 op0 = operands[0];
14348 op1 = operands[1];
14349
14350 if (GET_CODE (op1) == SYMBOL_REF)
14351 {
14352 model = SYMBOL_REF_TLS_MODEL (op1);
14353 if (model)
14354 {
14355 op1 = legitimize_tls_address (op1, model, true);
14356 op1 = force_operand (op1, op0);
14357 if (op1 == op0)
14358 return;
14359 }
14360 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
14361 && SYMBOL_REF_DLLIMPORT_P (op1))
14362 op1 = legitimize_dllimport_symbol (op1, false);
14363 }
14364 else if (GET_CODE (op1) == CONST
14365 && GET_CODE (XEXP (op1, 0)) == PLUS
14366 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
14367 {
14368 rtx addend = XEXP (XEXP (op1, 0), 1);
14369 rtx symbol = XEXP (XEXP (op1, 0), 0);
14370 rtx tmp = NULL;
14371
14372 model = SYMBOL_REF_TLS_MODEL (symbol);
14373 if (model)
14374 tmp = legitimize_tls_address (symbol, model, true);
14375 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
14376 && SYMBOL_REF_DLLIMPORT_P (symbol))
14377 tmp = legitimize_dllimport_symbol (symbol, true);
14378
14379 if (tmp)
14380 {
14381 tmp = force_operand (tmp, NULL);
14382 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
14383 op0, 1, OPTAB_DIRECT);
14384 if (tmp == op0)
14385 return;
14386 }
14387 }
14388
14389 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
14390 {
14391 if (TARGET_MACHO && !TARGET_64BIT)
14392 {
14393 #if TARGET_MACHO
14394 if (MACHOPIC_PURE)
14395 {
14396 rtx temp = ((reload_in_progress
14397 || ((op0 && REG_P (op0))
14398 && mode == Pmode))
14399 ? op0 : gen_reg_rtx (Pmode));
14400 op1 = machopic_indirect_data_reference (op1, temp);
14401 op1 = machopic_legitimize_pic_address (op1, mode,
14402 temp == op1 ? 0 : temp);
14403 }
14404 else if (MACHOPIC_INDIRECT)
14405 op1 = machopic_indirect_data_reference (op1, 0);
14406 if (op0 == op1)
14407 return;
14408 #endif
14409 }
14410 else
14411 {
14412 if (MEM_P (op0))
14413 op1 = force_reg (Pmode, op1);
14414 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
14415 {
14416 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
14417 op1 = legitimize_pic_address (op1, reg);
14418 if (op0 == op1)
14419 return;
14420 }
14421 }
14422 }
14423 else
14424 {
14425 if (MEM_P (op0)
14426 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
14427 || !push_operand (op0, mode))
14428 && MEM_P (op1))
14429 op1 = force_reg (mode, op1);
14430
14431 if (push_operand (op0, mode)
14432 && ! general_no_elim_operand (op1, mode))
14433 op1 = copy_to_mode_reg (mode, op1);
14434
14435 /* Force large constants in 64bit compilation into register
14436 to get them CSEed. */
14437 if (can_create_pseudo_p ()
14438 && (mode == DImode) && TARGET_64BIT
14439 && immediate_operand (op1, mode)
14440 && !x86_64_zext_immediate_operand (op1, VOIDmode)
14441 && !register_operand (op0, mode)
14442 && optimize)
14443 op1 = copy_to_mode_reg (mode, op1);
14444
14445 if (can_create_pseudo_p ()
14446 && FLOAT_MODE_P (mode)
14447 && GET_CODE (op1) == CONST_DOUBLE)
14448 {
14449 /* If we are loading a floating point constant to a register,
14450 force the value to memory now, since we'll get better code
14451 out the back end. */
14452
14453 op1 = validize_mem (force_const_mem (mode, op1));
14454 if (!register_operand (op0, mode))
14455 {
14456 rtx temp = gen_reg_rtx (mode);
14457 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
14458 emit_move_insn (op0, temp);
14459 return;
14460 }
14461 }
14462 }
14463
14464 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
14465 }
14466
14467 void
14468 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
14469 {
14470 rtx op0 = operands[0], op1 = operands[1];
14471 unsigned int align = GET_MODE_ALIGNMENT (mode);
14472
14473 /* Force constants other than zero into memory. We do not know how
14474 the instructions used to build constants modify the upper 64 bits
14475 of the register, once we have that information we may be able
14476 to handle some of them more efficiently. */
14477 if (can_create_pseudo_p ()
14478 && register_operand (op0, mode)
14479 && (CONSTANT_P (op1)
14480 || (GET_CODE (op1) == SUBREG
14481 && CONSTANT_P (SUBREG_REG (op1))))
14482 && !standard_sse_constant_p (op1))
14483 op1 = validize_mem (force_const_mem (mode, op1));
14484
14485 /* We need to check memory alignment for SSE mode since attribute
14486 can make operands unaligned. */
14487 if (can_create_pseudo_p ()
14488 && SSE_REG_MODE_P (mode)
14489 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
14490 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
14491 {
14492 rtx tmp[2];
14493
14494 /* ix86_expand_vector_move_misalign() does not like constants ... */
14495 if (CONSTANT_P (op1)
14496 || (GET_CODE (op1) == SUBREG
14497 && CONSTANT_P (SUBREG_REG (op1))))
14498 op1 = validize_mem (force_const_mem (mode, op1));
14499
14500 /* ... nor both arguments in memory. */
14501 if (!register_operand (op0, mode)
14502 && !register_operand (op1, mode))
14503 op1 = force_reg (mode, op1);
14504
14505 tmp[0] = op0; tmp[1] = op1;
14506 ix86_expand_vector_move_misalign (mode, tmp);
14507 return;
14508 }
14509
14510 /* Make operand1 a register if it isn't already. */
14511 if (can_create_pseudo_p ()
14512 && !register_operand (op0, mode)
14513 && !register_operand (op1, mode))
14514 {
14515 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
14516 return;
14517 }
14518
14519 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
14520 }
14521
14522 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
14523 straight to ix86_expand_vector_move. */
14524 /* Code generation for scalar reg-reg moves of single and double precision data:
14525 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
14526 movaps reg, reg
14527 else
14528 movss reg, reg
14529 if (x86_sse_partial_reg_dependency == true)
14530 movapd reg, reg
14531 else
14532 movsd reg, reg
14533
14534 Code generation for scalar loads of double precision data:
14535 if (x86_sse_split_regs == true)
14536 movlpd mem, reg (gas syntax)
14537 else
14538 movsd mem, reg
14539
14540 Code generation for unaligned packed loads of single precision data
14541 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
14542 if (x86_sse_unaligned_move_optimal)
14543 movups mem, reg
14544
14545 if (x86_sse_partial_reg_dependency == true)
14546 {
14547 xorps reg, reg
14548 movlps mem, reg
14549 movhps mem+8, reg
14550 }
14551 else
14552 {
14553 movlps mem, reg
14554 movhps mem+8, reg
14555 }
14556
14557 Code generation for unaligned packed loads of double precision data
14558 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
14559 if (x86_sse_unaligned_move_optimal)
14560 movupd mem, reg
14561
14562 if (x86_sse_split_regs == true)
14563 {
14564 movlpd mem, reg
14565 movhpd mem+8, reg
14566 }
14567 else
14568 {
14569 movsd mem, reg
14570 movhpd mem+8, reg
14571 }
14572 */
14573
14574 void
14575 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
14576 {
14577 rtx op0, op1, m;
14578
14579 op0 = operands[0];
14580 op1 = operands[1];
14581
14582 if (TARGET_AVX)
14583 {
14584 switch (GET_MODE_CLASS (mode))
14585 {
14586 case MODE_VECTOR_INT:
14587 case MODE_INT:
14588 switch (GET_MODE_SIZE (mode))
14589 {
14590 case 16:
14591 /* If we're optimizing for size, movups is the smallest. */
14592 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14593 {
14594 op0 = gen_lowpart (V4SFmode, op0);
14595 op1 = gen_lowpart (V4SFmode, op1);
14596 emit_insn (gen_avx_movups (op0, op1));
14597 return;
14598 }
14599 op0 = gen_lowpart (V16QImode, op0);
14600 op1 = gen_lowpart (V16QImode, op1);
14601 emit_insn (gen_avx_movdqu (op0, op1));
14602 break;
14603 case 32:
14604 op0 = gen_lowpart (V32QImode, op0);
14605 op1 = gen_lowpart (V32QImode, op1);
14606 emit_insn (gen_avx_movdqu256 (op0, op1));
14607 break;
14608 default:
14609 gcc_unreachable ();
14610 }
14611 break;
14612 case MODE_VECTOR_FLOAT:
14613 op0 = gen_lowpart (mode, op0);
14614 op1 = gen_lowpart (mode, op1);
14615
14616 switch (mode)
14617 {
14618 case V4SFmode:
14619 emit_insn (gen_avx_movups (op0, op1));
14620 break;
14621 case V8SFmode:
14622 emit_insn (gen_avx_movups256 (op0, op1));
14623 break;
14624 case V2DFmode:
14625 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14626 {
14627 op0 = gen_lowpart (V4SFmode, op0);
14628 op1 = gen_lowpart (V4SFmode, op1);
14629 emit_insn (gen_avx_movups (op0, op1));
14630 return;
14631 }
14632 emit_insn (gen_avx_movupd (op0, op1));
14633 break;
14634 case V4DFmode:
14635 emit_insn (gen_avx_movupd256 (op0, op1));
14636 break;
14637 default:
14638 gcc_unreachable ();
14639 }
14640 break;
14641
14642 default:
14643 gcc_unreachable ();
14644 }
14645
14646 return;
14647 }
14648
14649 if (MEM_P (op1))
14650 {
14651 /* If we're optimizing for size, movups is the smallest. */
14652 if (optimize_insn_for_size_p ()
14653 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14654 {
14655 op0 = gen_lowpart (V4SFmode, op0);
14656 op1 = gen_lowpart (V4SFmode, op1);
14657 emit_insn (gen_sse_movups (op0, op1));
14658 return;
14659 }
14660
14661 /* ??? If we have typed data, then it would appear that using
14662 movdqu is the only way to get unaligned data loaded with
14663 integer type. */
14664 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14665 {
14666 op0 = gen_lowpart (V16QImode, op0);
14667 op1 = gen_lowpart (V16QImode, op1);
14668 emit_insn (gen_sse2_movdqu (op0, op1));
14669 return;
14670 }
14671
14672 if (TARGET_SSE2 && mode == V2DFmode)
14673 {
14674 rtx zero;
14675
14676 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14677 {
14678 op0 = gen_lowpart (V2DFmode, op0);
14679 op1 = gen_lowpart (V2DFmode, op1);
14680 emit_insn (gen_sse2_movupd (op0, op1));
14681 return;
14682 }
14683
14684 /* When SSE registers are split into halves, we can avoid
14685 writing to the top half twice. */
14686 if (TARGET_SSE_SPLIT_REGS)
14687 {
14688 emit_clobber (op0);
14689 zero = op0;
14690 }
14691 else
14692 {
14693 /* ??? Not sure about the best option for the Intel chips.
14694 The following would seem to satisfy; the register is
14695 entirely cleared, breaking the dependency chain. We
14696 then store to the upper half, with a dependency depth
14697 of one. A rumor has it that Intel recommends two movsd
14698 followed by an unpacklpd, but this is unconfirmed. And
14699 given that the dependency depth of the unpacklpd would
14700 still be one, I'm not sure why this would be better. */
14701 zero = CONST0_RTX (V2DFmode);
14702 }
14703
14704 m = adjust_address (op1, DFmode, 0);
14705 emit_insn (gen_sse2_loadlpd (op0, zero, m));
14706 m = adjust_address (op1, DFmode, 8);
14707 emit_insn (gen_sse2_loadhpd (op0, op0, m));
14708 }
14709 else
14710 {
14711 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14712 {
14713 op0 = gen_lowpart (V4SFmode, op0);
14714 op1 = gen_lowpart (V4SFmode, op1);
14715 emit_insn (gen_sse_movups (op0, op1));
14716 return;
14717 }
14718
14719 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
14720 emit_move_insn (op0, CONST0_RTX (mode));
14721 else
14722 emit_clobber (op0);
14723
14724 if (mode != V4SFmode)
14725 op0 = gen_lowpart (V4SFmode, op0);
14726 m = adjust_address (op1, V2SFmode, 0);
14727 emit_insn (gen_sse_loadlps (op0, op0, m));
14728 m = adjust_address (op1, V2SFmode, 8);
14729 emit_insn (gen_sse_loadhps (op0, op0, m));
14730 }
14731 }
14732 else if (MEM_P (op0))
14733 {
14734 /* If we're optimizing for size, movups is the smallest. */
14735 if (optimize_insn_for_size_p ()
14736 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14737 {
14738 op0 = gen_lowpart (V4SFmode, op0);
14739 op1 = gen_lowpart (V4SFmode, op1);
14740 emit_insn (gen_sse_movups (op0, op1));
14741 return;
14742 }
14743
14744 /* ??? Similar to above, only less clear because of quote
14745 typeless stores unquote. */
14746 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
14747 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14748 {
14749 op0 = gen_lowpart (V16QImode, op0);
14750 op1 = gen_lowpart (V16QImode, op1);
14751 emit_insn (gen_sse2_movdqu (op0, op1));
14752 return;
14753 }
14754
14755 if (TARGET_SSE2 && mode == V2DFmode)
14756 {
14757 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14758 {
14759 op0 = gen_lowpart (V2DFmode, op0);
14760 op1 = gen_lowpart (V2DFmode, op1);
14761 emit_insn (gen_sse2_movupd (op0, op1));
14762 }
14763 else
14764 {
14765 m = adjust_address (op0, DFmode, 0);
14766 emit_insn (gen_sse2_storelpd (m, op1));
14767 m = adjust_address (op0, DFmode, 8);
14768 emit_insn (gen_sse2_storehpd (m, op1));
14769 }
14770 }
14771 else
14772 {
14773 if (mode != V4SFmode)
14774 op1 = gen_lowpart (V4SFmode, op1);
14775
14776 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14777 {
14778 op0 = gen_lowpart (V4SFmode, op0);
14779 emit_insn (gen_sse_movups (op0, op1));
14780 }
14781 else
14782 {
14783 m = adjust_address (op0, V2SFmode, 0);
14784 emit_insn (gen_sse_storelps (m, op1));
14785 m = adjust_address (op0, V2SFmode, 8);
14786 emit_insn (gen_sse_storehps (m, op1));
14787 }
14788 }
14789 }
14790 else
14791 gcc_unreachable ();
14792 }
14793
14794 /* Expand a push in MODE. This is some mode for which we do not support
14795 proper push instructions, at least from the registers that we expect
14796 the value to live in. */
14797
14798 void
14799 ix86_expand_push (enum machine_mode mode, rtx x)
14800 {
14801 rtx tmp;
14802
14803 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
14804 GEN_INT (-GET_MODE_SIZE (mode)),
14805 stack_pointer_rtx, 1, OPTAB_DIRECT);
14806 if (tmp != stack_pointer_rtx)
14807 emit_move_insn (stack_pointer_rtx, tmp);
14808
14809 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
14810
14811 /* When we push an operand onto stack, it has to be aligned at least
14812 at the function argument boundary. However since we don't have
14813 the argument type, we can't determine the actual argument
14814 boundary. */
14815 emit_move_insn (tmp, x);
14816 }
14817
14818 /* Helper function of ix86_fixup_binary_operands to canonicalize
14819 operand order. Returns true if the operands should be swapped. */
14820
14821 static bool
14822 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
14823 rtx operands[])
14824 {
14825 rtx dst = operands[0];
14826 rtx src1 = operands[1];
14827 rtx src2 = operands[2];
14828
14829 /* If the operation is not commutative, we can't do anything. */
14830 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
14831 return false;
14832
14833 /* Highest priority is that src1 should match dst. */
14834 if (rtx_equal_p (dst, src1))
14835 return false;
14836 if (rtx_equal_p (dst, src2))
14837 return true;
14838
14839 /* Next highest priority is that immediate constants come second. */
14840 if (immediate_operand (src2, mode))
14841 return false;
14842 if (immediate_operand (src1, mode))
14843 return true;
14844
14845 /* Lowest priority is that memory references should come second. */
14846 if (MEM_P (src2))
14847 return false;
14848 if (MEM_P (src1))
14849 return true;
14850
14851 return false;
14852 }
14853
14854
14855 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
14856 destination to use for the operation. If different from the true
14857 destination in operands[0], a copy operation will be required. */
14858
14859 rtx
14860 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
14861 rtx operands[])
14862 {
14863 rtx dst = operands[0];
14864 rtx src1 = operands[1];
14865 rtx src2 = operands[2];
14866
14867 /* Canonicalize operand order. */
14868 if (ix86_swap_binary_operands_p (code, mode, operands))
14869 {
14870 rtx temp;
14871
14872 /* It is invalid to swap operands of different modes. */
14873 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
14874
14875 temp = src1;
14876 src1 = src2;
14877 src2 = temp;
14878 }
14879
14880 /* Both source operands cannot be in memory. */
14881 if (MEM_P (src1) && MEM_P (src2))
14882 {
14883 /* Optimization: Only read from memory once. */
14884 if (rtx_equal_p (src1, src2))
14885 {
14886 src2 = force_reg (mode, src2);
14887 src1 = src2;
14888 }
14889 else
14890 src2 = force_reg (mode, src2);
14891 }
14892
14893 /* If the destination is memory, and we do not have matching source
14894 operands, do things in registers. */
14895 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
14896 dst = gen_reg_rtx (mode);
14897
14898 /* Source 1 cannot be a constant. */
14899 if (CONSTANT_P (src1))
14900 src1 = force_reg (mode, src1);
14901
14902 /* Source 1 cannot be a non-matching memory. */
14903 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
14904 src1 = force_reg (mode, src1);
14905
14906 operands[1] = src1;
14907 operands[2] = src2;
14908 return dst;
14909 }
14910
14911 /* Similarly, but assume that the destination has already been
14912 set up properly. */
14913
14914 void
14915 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
14916 enum machine_mode mode, rtx operands[])
14917 {
14918 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
14919 gcc_assert (dst == operands[0]);
14920 }
14921
14922 /* Attempt to expand a binary operator. Make the expansion closer to the
14923 actual machine, then just general_operand, which will allow 3 separate
14924 memory references (one output, two input) in a single insn. */
14925
14926 void
14927 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
14928 rtx operands[])
14929 {
14930 rtx src1, src2, dst, op, clob;
14931
14932 dst = ix86_fixup_binary_operands (code, mode, operands);
14933 src1 = operands[1];
14934 src2 = operands[2];
14935
14936 /* Emit the instruction. */
14937
14938 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
14939 if (reload_in_progress)
14940 {
14941 /* Reload doesn't know about the flags register, and doesn't know that
14942 it doesn't want to clobber it. We can only do this with PLUS. */
14943 gcc_assert (code == PLUS);
14944 emit_insn (op);
14945 }
14946 else
14947 {
14948 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14949 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
14950 }
14951
14952 /* Fix up the destination if needed. */
14953 if (dst != operands[0])
14954 emit_move_insn (operands[0], dst);
14955 }
14956
14957 /* Return TRUE or FALSE depending on whether the binary operator meets the
14958 appropriate constraints. */
14959
14960 bool
14961 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
14962 rtx operands[3])
14963 {
14964 rtx dst = operands[0];
14965 rtx src1 = operands[1];
14966 rtx src2 = operands[2];
14967
14968 /* Both source operands cannot be in memory. */
14969 if (MEM_P (src1) && MEM_P (src2))
14970 return false;
14971
14972 /* Canonicalize operand order for commutative operators. */
14973 if (ix86_swap_binary_operands_p (code, mode, operands))
14974 {
14975 rtx temp = src1;
14976 src1 = src2;
14977 src2 = temp;
14978 }
14979
14980 /* If the destination is memory, we must have a matching source operand. */
14981 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
14982 return false;
14983
14984 /* Source 1 cannot be a constant. */
14985 if (CONSTANT_P (src1))
14986 return false;
14987
14988 /* Source 1 cannot be a non-matching memory. */
14989 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
14990 return false;
14991
14992 return true;
14993 }
14994
14995 /* Attempt to expand a unary operator. Make the expansion closer to the
14996 actual machine, then just general_operand, which will allow 2 separate
14997 memory references (one output, one input) in a single insn. */
14998
14999 void
15000 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
15001 rtx operands[])
15002 {
15003 int matching_memory;
15004 rtx src, dst, op, clob;
15005
15006 dst = operands[0];
15007 src = operands[1];
15008
15009 /* If the destination is memory, and we do not have matching source
15010 operands, do things in registers. */
15011 matching_memory = 0;
15012 if (MEM_P (dst))
15013 {
15014 if (rtx_equal_p (dst, src))
15015 matching_memory = 1;
15016 else
15017 dst = gen_reg_rtx (mode);
15018 }
15019
15020 /* When source operand is memory, destination must match. */
15021 if (MEM_P (src) && !matching_memory)
15022 src = force_reg (mode, src);
15023
15024 /* Emit the instruction. */
15025
15026 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
15027 if (reload_in_progress || code == NOT)
15028 {
15029 /* Reload doesn't know about the flags register, and doesn't know that
15030 it doesn't want to clobber it. */
15031 gcc_assert (code == NOT);
15032 emit_insn (op);
15033 }
15034 else
15035 {
15036 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15037 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15038 }
15039
15040 /* Fix up the destination if needed. */
15041 if (dst != operands[0])
15042 emit_move_insn (operands[0], dst);
15043 }
15044
15045 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
15046 divisor are within the the range [0-255]. */
15047
15048 void
15049 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
15050 bool signed_p)
15051 {
15052 rtx end_label, qimode_label;
15053 rtx insn, div, mod;
15054 rtx scratch, tmp0, tmp1, tmp2;
15055 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
15056 rtx (*gen_zero_extend) (rtx, rtx);
15057 rtx (*gen_test_ccno_1) (rtx, rtx);
15058
15059 switch (mode)
15060 {
15061 case SImode:
15062 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
15063 gen_test_ccno_1 = gen_testsi_ccno_1;
15064 gen_zero_extend = gen_zero_extendqisi2;
15065 break;
15066 case DImode:
15067 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
15068 gen_test_ccno_1 = gen_testdi_ccno_1;
15069 gen_zero_extend = gen_zero_extendqidi2;
15070 break;
15071 default:
15072 gcc_unreachable ();
15073 }
15074
15075 end_label = gen_label_rtx ();
15076 qimode_label = gen_label_rtx ();
15077
15078 scratch = gen_reg_rtx (mode);
15079
15080 /* Use 8bit unsigned divimod if dividend and divisor are within the
15081 the range [0-255]. */
15082 emit_move_insn (scratch, operands[2]);
15083 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
15084 scratch, 1, OPTAB_DIRECT);
15085 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
15086 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
15087 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
15088 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
15089 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
15090 pc_rtx);
15091 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
15092 predict_jump (REG_BR_PROB_BASE * 50 / 100);
15093 JUMP_LABEL (insn) = qimode_label;
15094
15095 /* Generate original signed/unsigned divimod. */
15096 div = gen_divmod4_1 (operands[0], operands[1],
15097 operands[2], operands[3]);
15098 emit_insn (div);
15099
15100 /* Branch to the end. */
15101 emit_jump_insn (gen_jump (end_label));
15102 emit_barrier ();
15103
15104 /* Generate 8bit unsigned divide. */
15105 emit_label (qimode_label);
15106 /* Don't use operands[0] for result of 8bit divide since not all
15107 registers support QImode ZERO_EXTRACT. */
15108 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
15109 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
15110 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
15111 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
15112
15113 if (signed_p)
15114 {
15115 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
15116 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
15117 }
15118 else
15119 {
15120 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
15121 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
15122 }
15123
15124 /* Extract remainder from AH. */
15125 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
15126 if (REG_P (operands[1]))
15127 insn = emit_move_insn (operands[1], tmp1);
15128 else
15129 {
15130 /* Need a new scratch register since the old one has result
15131 of 8bit divide. */
15132 scratch = gen_reg_rtx (mode);
15133 emit_move_insn (scratch, tmp1);
15134 insn = emit_move_insn (operands[1], scratch);
15135 }
15136 set_unique_reg_note (insn, REG_EQUAL, mod);
15137
15138 /* Zero extend quotient from AL. */
15139 tmp1 = gen_lowpart (QImode, tmp0);
15140 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
15141 set_unique_reg_note (insn, REG_EQUAL, div);
15142
15143 emit_label (end_label);
15144 }
15145
15146 #define LEA_SEARCH_THRESHOLD 12
15147
15148 /* Search backward for non-agu definition of register number REGNO1
15149 or register number REGNO2 in INSN's basic block until
15150 1. Pass LEA_SEARCH_THRESHOLD instructions, or
15151 2. Reach BB boundary, or
15152 3. Reach agu definition.
15153 Returns the distance between the non-agu definition point and INSN.
15154 If no definition point, returns -1. */
15155
15156 static int
15157 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
15158 rtx insn)
15159 {
15160 basic_block bb = BLOCK_FOR_INSN (insn);
15161 int distance = 0;
15162 df_ref *def_rec;
15163 enum attr_type insn_type;
15164
15165 if (insn != BB_HEAD (bb))
15166 {
15167 rtx prev = PREV_INSN (insn);
15168 while (prev && distance < LEA_SEARCH_THRESHOLD)
15169 {
15170 if (NONDEBUG_INSN_P (prev))
15171 {
15172 distance++;
15173 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15174 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15175 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15176 && (regno1 == DF_REF_REGNO (*def_rec)
15177 || regno2 == DF_REF_REGNO (*def_rec)))
15178 {
15179 insn_type = get_attr_type (prev);
15180 if (insn_type != TYPE_LEA)
15181 goto done;
15182 }
15183 }
15184 if (prev == BB_HEAD (bb))
15185 break;
15186 prev = PREV_INSN (prev);
15187 }
15188 }
15189
15190 if (distance < LEA_SEARCH_THRESHOLD)
15191 {
15192 edge e;
15193 edge_iterator ei;
15194 bool simple_loop = false;
15195
15196 FOR_EACH_EDGE (e, ei, bb->preds)
15197 if (e->src == bb)
15198 {
15199 simple_loop = true;
15200 break;
15201 }
15202
15203 if (simple_loop)
15204 {
15205 rtx prev = BB_END (bb);
15206 while (prev
15207 && prev != insn
15208 && distance < LEA_SEARCH_THRESHOLD)
15209 {
15210 if (NONDEBUG_INSN_P (prev))
15211 {
15212 distance++;
15213 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15214 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15215 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15216 && (regno1 == DF_REF_REGNO (*def_rec)
15217 || regno2 == DF_REF_REGNO (*def_rec)))
15218 {
15219 insn_type = get_attr_type (prev);
15220 if (insn_type != TYPE_LEA)
15221 goto done;
15222 }
15223 }
15224 prev = PREV_INSN (prev);
15225 }
15226 }
15227 }
15228
15229 distance = -1;
15230
15231 done:
15232 /* get_attr_type may modify recog data. We want to make sure
15233 that recog data is valid for instruction INSN, on which
15234 distance_non_agu_define is called. INSN is unchanged here. */
15235 extract_insn_cached (insn);
15236 return distance;
15237 }
15238
15239 /* Return the distance between INSN and the next insn that uses
15240 register number REGNO0 in memory address. Return -1 if no such
15241 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
15242
15243 static int
15244 distance_agu_use (unsigned int regno0, rtx insn)
15245 {
15246 basic_block bb = BLOCK_FOR_INSN (insn);
15247 int distance = 0;
15248 df_ref *def_rec;
15249 df_ref *use_rec;
15250
15251 if (insn != BB_END (bb))
15252 {
15253 rtx next = NEXT_INSN (insn);
15254 while (next && distance < LEA_SEARCH_THRESHOLD)
15255 {
15256 if (NONDEBUG_INSN_P (next))
15257 {
15258 distance++;
15259
15260 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
15261 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
15262 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
15263 && regno0 == DF_REF_REGNO (*use_rec))
15264 {
15265 /* Return DISTANCE if OP0 is used in memory
15266 address in NEXT. */
15267 return distance;
15268 }
15269
15270 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
15271 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15272 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15273 && regno0 == DF_REF_REGNO (*def_rec))
15274 {
15275 /* Return -1 if OP0 is set in NEXT. */
15276 return -1;
15277 }
15278 }
15279 if (next == BB_END (bb))
15280 break;
15281 next = NEXT_INSN (next);
15282 }
15283 }
15284
15285 if (distance < LEA_SEARCH_THRESHOLD)
15286 {
15287 edge e;
15288 edge_iterator ei;
15289 bool simple_loop = false;
15290
15291 FOR_EACH_EDGE (e, ei, bb->succs)
15292 if (e->dest == bb)
15293 {
15294 simple_loop = true;
15295 break;
15296 }
15297
15298 if (simple_loop)
15299 {
15300 rtx next = BB_HEAD (bb);
15301 while (next
15302 && next != insn
15303 && distance < LEA_SEARCH_THRESHOLD)
15304 {
15305 if (NONDEBUG_INSN_P (next))
15306 {
15307 distance++;
15308
15309 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
15310 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
15311 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
15312 && regno0 == DF_REF_REGNO (*use_rec))
15313 {
15314 /* Return DISTANCE if OP0 is used in memory
15315 address in NEXT. */
15316 return distance;
15317 }
15318
15319 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
15320 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15321 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15322 && regno0 == DF_REF_REGNO (*def_rec))
15323 {
15324 /* Return -1 if OP0 is set in NEXT. */
15325 return -1;
15326 }
15327
15328 }
15329 next = NEXT_INSN (next);
15330 }
15331 }
15332 }
15333
15334 return -1;
15335 }
15336
15337 /* Define this macro to tune LEA priority vs ADD, it take effect when
15338 there is a dilemma of choicing LEA or ADD
15339 Negative value: ADD is more preferred than LEA
15340 Zero: Netrual
15341 Positive value: LEA is more preferred than ADD*/
15342 #define IX86_LEA_PRIORITY 2
15343
15344 /* Return true if it is ok to optimize an ADD operation to LEA
15345 operation to avoid flag register consumation. For most processors,
15346 ADD is faster than LEA. For the processors like ATOM, if the
15347 destination register of LEA holds an actual address which will be
15348 used soon, LEA is better and otherwise ADD is better. */
15349
15350 bool
15351 ix86_lea_for_add_ok (rtx insn, rtx operands[])
15352 {
15353 unsigned int regno0 = true_regnum (operands[0]);
15354 unsigned int regno1 = true_regnum (operands[1]);
15355 unsigned int regno2 = true_regnum (operands[2]);
15356
15357 /* If a = b + c, (a!=b && a!=c), must use lea form. */
15358 if (regno0 != regno1 && regno0 != regno2)
15359 return true;
15360
15361 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
15362 return false;
15363 else
15364 {
15365 int dist_define, dist_use;
15366
15367 /* Return false if REGNO0 isn't used in memory address. */
15368 dist_use = distance_agu_use (regno0, insn);
15369 if (dist_use <= 0)
15370 return false;
15371
15372 dist_define = distance_non_agu_define (regno1, regno2, insn);
15373 if (dist_define <= 0)
15374 return true;
15375
15376 /* If this insn has both backward non-agu dependence and forward
15377 agu dependence, the one with short distance take effect. */
15378 if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
15379 return false;
15380
15381 return true;
15382 }
15383 }
15384
15385 /* Return true if destination reg of SET_BODY is shift count of
15386 USE_BODY. */
15387
15388 static bool
15389 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
15390 {
15391 rtx set_dest;
15392 rtx shift_rtx;
15393 int i;
15394
15395 /* Retrieve destination of SET_BODY. */
15396 switch (GET_CODE (set_body))
15397 {
15398 case SET:
15399 set_dest = SET_DEST (set_body);
15400 if (!set_dest || !REG_P (set_dest))
15401 return false;
15402 break;
15403 case PARALLEL:
15404 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
15405 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
15406 use_body))
15407 return true;
15408 default:
15409 return false;
15410 break;
15411 }
15412
15413 /* Retrieve shift count of USE_BODY. */
15414 switch (GET_CODE (use_body))
15415 {
15416 case SET:
15417 shift_rtx = XEXP (use_body, 1);
15418 break;
15419 case PARALLEL:
15420 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
15421 if (ix86_dep_by_shift_count_body (set_body,
15422 XVECEXP (use_body, 0, i)))
15423 return true;
15424 default:
15425 return false;
15426 break;
15427 }
15428
15429 if (shift_rtx
15430 && (GET_CODE (shift_rtx) == ASHIFT
15431 || GET_CODE (shift_rtx) == LSHIFTRT
15432 || GET_CODE (shift_rtx) == ASHIFTRT
15433 || GET_CODE (shift_rtx) == ROTATE
15434 || GET_CODE (shift_rtx) == ROTATERT))
15435 {
15436 rtx shift_count = XEXP (shift_rtx, 1);
15437
15438 /* Return true if shift count is dest of SET_BODY. */
15439 if (REG_P (shift_count)
15440 && true_regnum (set_dest) == true_regnum (shift_count))
15441 return true;
15442 }
15443
15444 return false;
15445 }
15446
15447 /* Return true if destination reg of SET_INSN is shift count of
15448 USE_INSN. */
15449
15450 bool
15451 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
15452 {
15453 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
15454 PATTERN (use_insn));
15455 }
15456
15457 /* Return TRUE or FALSE depending on whether the unary operator meets the
15458 appropriate constraints. */
15459
15460 bool
15461 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
15462 enum machine_mode mode ATTRIBUTE_UNUSED,
15463 rtx operands[2] ATTRIBUTE_UNUSED)
15464 {
15465 /* If one of operands is memory, source and destination must match. */
15466 if ((MEM_P (operands[0])
15467 || MEM_P (operands[1]))
15468 && ! rtx_equal_p (operands[0], operands[1]))
15469 return false;
15470 return true;
15471 }
15472
15473 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
15474 are ok, keeping in mind the possible movddup alternative. */
15475
15476 bool
15477 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
15478 {
15479 if (MEM_P (operands[0]))
15480 return rtx_equal_p (operands[0], operands[1 + high]);
15481 if (MEM_P (operands[1]) && MEM_P (operands[2]))
15482 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
15483 return true;
15484 }
15485
15486 /* Post-reload splitter for converting an SF or DFmode value in an
15487 SSE register into an unsigned SImode. */
15488
15489 void
15490 ix86_split_convert_uns_si_sse (rtx operands[])
15491 {
15492 enum machine_mode vecmode;
15493 rtx value, large, zero_or_two31, input, two31, x;
15494
15495 large = operands[1];
15496 zero_or_two31 = operands[2];
15497 input = operands[3];
15498 two31 = operands[4];
15499 vecmode = GET_MODE (large);
15500 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
15501
15502 /* Load up the value into the low element. We must ensure that the other
15503 elements are valid floats -- zero is the easiest such value. */
15504 if (MEM_P (input))
15505 {
15506 if (vecmode == V4SFmode)
15507 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
15508 else
15509 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
15510 }
15511 else
15512 {
15513 input = gen_rtx_REG (vecmode, REGNO (input));
15514 emit_move_insn (value, CONST0_RTX (vecmode));
15515 if (vecmode == V4SFmode)
15516 emit_insn (gen_sse_movss (value, value, input));
15517 else
15518 emit_insn (gen_sse2_movsd (value, value, input));
15519 }
15520
15521 emit_move_insn (large, two31);
15522 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
15523
15524 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
15525 emit_insn (gen_rtx_SET (VOIDmode, large, x));
15526
15527 x = gen_rtx_AND (vecmode, zero_or_two31, large);
15528 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
15529
15530 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
15531 emit_insn (gen_rtx_SET (VOIDmode, value, x));
15532
15533 large = gen_rtx_REG (V4SImode, REGNO (large));
15534 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
15535
15536 x = gen_rtx_REG (V4SImode, REGNO (value));
15537 if (vecmode == V4SFmode)
15538 emit_insn (gen_sse2_cvttps2dq (x, value));
15539 else
15540 emit_insn (gen_sse2_cvttpd2dq (x, value));
15541 value = x;
15542
15543 emit_insn (gen_xorv4si3 (value, value, large));
15544 }
15545
15546 /* Convert an unsigned DImode value into a DFmode, using only SSE.
15547 Expects the 64-bit DImode to be supplied in a pair of integral
15548 registers. Requires SSE2; will use SSE3 if available. For x86_32,
15549 -mfpmath=sse, !optimize_size only. */
15550
15551 void
15552 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
15553 {
15554 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
15555 rtx int_xmm, fp_xmm;
15556 rtx biases, exponents;
15557 rtx x;
15558
15559 int_xmm = gen_reg_rtx (V4SImode);
15560 if (TARGET_INTER_UNIT_MOVES)
15561 emit_insn (gen_movdi_to_sse (int_xmm, input));
15562 else if (TARGET_SSE_SPLIT_REGS)
15563 {
15564 emit_clobber (int_xmm);
15565 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
15566 }
15567 else
15568 {
15569 x = gen_reg_rtx (V2DImode);
15570 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
15571 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
15572 }
15573
15574 x = gen_rtx_CONST_VECTOR (V4SImode,
15575 gen_rtvec (4, GEN_INT (0x43300000UL),
15576 GEN_INT (0x45300000UL),
15577 const0_rtx, const0_rtx));
15578 exponents = validize_mem (force_const_mem (V4SImode, x));
15579
15580 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
15581 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
15582
15583 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
15584 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
15585 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
15586 (0x1.0p84 + double(fp_value_hi_xmm)).
15587 Note these exponents differ by 32. */
15588
15589 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
15590
15591 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
15592 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
15593 real_ldexp (&bias_lo_rvt, &dconst1, 52);
15594 real_ldexp (&bias_hi_rvt, &dconst1, 84);
15595 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
15596 x = const_double_from_real_value (bias_hi_rvt, DFmode);
15597 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
15598 biases = validize_mem (force_const_mem (V2DFmode, biases));
15599 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
15600
15601 /* Add the upper and lower DFmode values together. */
15602 if (TARGET_SSE3)
15603 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
15604 else
15605 {
15606 x = copy_to_mode_reg (V2DFmode, fp_xmm);
15607 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
15608 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
15609 }
15610
15611 ix86_expand_vector_extract (false, target, fp_xmm, 0);
15612 }
15613
15614 /* Not used, but eases macroization of patterns. */
15615 void
15616 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
15617 rtx input ATTRIBUTE_UNUSED)
15618 {
15619 gcc_unreachable ();
15620 }
15621
15622 /* Convert an unsigned SImode value into a DFmode. Only currently used
15623 for SSE, but applicable anywhere. */
15624
15625 void
15626 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
15627 {
15628 REAL_VALUE_TYPE TWO31r;
15629 rtx x, fp;
15630
15631 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
15632 NULL, 1, OPTAB_DIRECT);
15633
15634 fp = gen_reg_rtx (DFmode);
15635 emit_insn (gen_floatsidf2 (fp, x));
15636
15637 real_ldexp (&TWO31r, &dconst1, 31);
15638 x = const_double_from_real_value (TWO31r, DFmode);
15639
15640 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
15641 if (x != target)
15642 emit_move_insn (target, x);
15643 }
15644
15645 /* Convert a signed DImode value into a DFmode. Only used for SSE in
15646 32-bit mode; otherwise we have a direct convert instruction. */
15647
15648 void
15649 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
15650 {
15651 REAL_VALUE_TYPE TWO32r;
15652 rtx fp_lo, fp_hi, x;
15653
15654 fp_lo = gen_reg_rtx (DFmode);
15655 fp_hi = gen_reg_rtx (DFmode);
15656
15657 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
15658
15659 real_ldexp (&TWO32r, &dconst1, 32);
15660 x = const_double_from_real_value (TWO32r, DFmode);
15661 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
15662
15663 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
15664
15665 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
15666 0, OPTAB_DIRECT);
15667 if (x != target)
15668 emit_move_insn (target, x);
15669 }
15670
15671 /* Convert an unsigned SImode value into a SFmode, using only SSE.
15672 For x86_32, -mfpmath=sse, !optimize_size only. */
15673 void
15674 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
15675 {
15676 REAL_VALUE_TYPE ONE16r;
15677 rtx fp_hi, fp_lo, int_hi, int_lo, x;
15678
15679 real_ldexp (&ONE16r, &dconst1, 16);
15680 x = const_double_from_real_value (ONE16r, SFmode);
15681 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
15682 NULL, 0, OPTAB_DIRECT);
15683 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
15684 NULL, 0, OPTAB_DIRECT);
15685 fp_hi = gen_reg_rtx (SFmode);
15686 fp_lo = gen_reg_rtx (SFmode);
15687 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
15688 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
15689 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
15690 0, OPTAB_DIRECT);
15691 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
15692 0, OPTAB_DIRECT);
15693 if (!rtx_equal_p (target, fp_hi))
15694 emit_move_insn (target, fp_hi);
15695 }
15696
15697 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
15698 then replicate the value for all elements of the vector
15699 register. */
15700
15701 rtx
15702 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
15703 {
15704 rtvec v;
15705 switch (mode)
15706 {
15707 case SImode:
15708 gcc_assert (vect);
15709 v = gen_rtvec (4, value, value, value, value);
15710 return gen_rtx_CONST_VECTOR (V4SImode, v);
15711
15712 case DImode:
15713 gcc_assert (vect);
15714 v = gen_rtvec (2, value, value);
15715 return gen_rtx_CONST_VECTOR (V2DImode, v);
15716
15717 case SFmode:
15718 if (vect)
15719 v = gen_rtvec (4, value, value, value, value);
15720 else
15721 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
15722 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
15723 return gen_rtx_CONST_VECTOR (V4SFmode, v);
15724
15725 case DFmode:
15726 if (vect)
15727 v = gen_rtvec (2, value, value);
15728 else
15729 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
15730 return gen_rtx_CONST_VECTOR (V2DFmode, v);
15731
15732 default:
15733 gcc_unreachable ();
15734 }
15735 }
15736
15737 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
15738 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
15739 for an SSE register. If VECT is true, then replicate the mask for
15740 all elements of the vector register. If INVERT is true, then create
15741 a mask excluding the sign bit. */
15742
15743 rtx
15744 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
15745 {
15746 enum machine_mode vec_mode, imode;
15747 HOST_WIDE_INT hi, lo;
15748 int shift = 63;
15749 rtx v;
15750 rtx mask;
15751
15752 /* Find the sign bit, sign extended to 2*HWI. */
15753 switch (mode)
15754 {
15755 case SImode:
15756 case SFmode:
15757 imode = SImode;
15758 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
15759 lo = 0x80000000, hi = lo < 0;
15760 break;
15761
15762 case DImode:
15763 case DFmode:
15764 imode = DImode;
15765 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
15766 if (HOST_BITS_PER_WIDE_INT >= 64)
15767 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
15768 else
15769 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15770 break;
15771
15772 case TImode:
15773 case TFmode:
15774 vec_mode = VOIDmode;
15775 if (HOST_BITS_PER_WIDE_INT >= 64)
15776 {
15777 imode = TImode;
15778 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
15779 }
15780 else
15781 {
15782 rtvec vec;
15783
15784 imode = DImode;
15785 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15786
15787 if (invert)
15788 {
15789 lo = ~lo, hi = ~hi;
15790 v = constm1_rtx;
15791 }
15792 else
15793 v = const0_rtx;
15794
15795 mask = immed_double_const (lo, hi, imode);
15796
15797 vec = gen_rtvec (2, v, mask);
15798 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
15799 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
15800
15801 return v;
15802 }
15803 break;
15804
15805 default:
15806 gcc_unreachable ();
15807 }
15808
15809 if (invert)
15810 lo = ~lo, hi = ~hi;
15811
15812 /* Force this value into the low part of a fp vector constant. */
15813 mask = immed_double_const (lo, hi, imode);
15814 mask = gen_lowpart (mode, mask);
15815
15816 if (vec_mode == VOIDmode)
15817 return force_reg (mode, mask);
15818
15819 v = ix86_build_const_vector (mode, vect, mask);
15820 return force_reg (vec_mode, v);
15821 }
15822
15823 /* Generate code for floating point ABS or NEG. */
15824
15825 void
15826 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
15827 rtx operands[])
15828 {
15829 rtx mask, set, use, clob, dst, src;
15830 bool use_sse = false;
15831 bool vector_mode = VECTOR_MODE_P (mode);
15832 enum machine_mode elt_mode = mode;
15833
15834 if (vector_mode)
15835 {
15836 elt_mode = GET_MODE_INNER (mode);
15837 use_sse = true;
15838 }
15839 else if (mode == TFmode)
15840 use_sse = true;
15841 else if (TARGET_SSE_MATH)
15842 use_sse = SSE_FLOAT_MODE_P (mode);
15843
15844 /* NEG and ABS performed with SSE use bitwise mask operations.
15845 Create the appropriate mask now. */
15846 if (use_sse)
15847 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
15848 else
15849 mask = NULL_RTX;
15850
15851 dst = operands[0];
15852 src = operands[1];
15853
15854 if (vector_mode)
15855 {
15856 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
15857 set = gen_rtx_SET (VOIDmode, dst, set);
15858 emit_insn (set);
15859 }
15860 else
15861 {
15862 set = gen_rtx_fmt_e (code, mode, src);
15863 set = gen_rtx_SET (VOIDmode, dst, set);
15864 if (mask)
15865 {
15866 use = gen_rtx_USE (VOIDmode, mask);
15867 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15868 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15869 gen_rtvec (3, set, use, clob)));
15870 }
15871 else
15872 emit_insn (set);
15873 }
15874 }
15875
15876 /* Expand a copysign operation. Special case operand 0 being a constant. */
15877
15878 void
15879 ix86_expand_copysign (rtx operands[])
15880 {
15881 enum machine_mode mode;
15882 rtx dest, op0, op1, mask, nmask;
15883
15884 dest = operands[0];
15885 op0 = operands[1];
15886 op1 = operands[2];
15887
15888 mode = GET_MODE (dest);
15889
15890 if (GET_CODE (op0) == CONST_DOUBLE)
15891 {
15892 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
15893
15894 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
15895 op0 = simplify_unary_operation (ABS, mode, op0, mode);
15896
15897 if (mode == SFmode || mode == DFmode)
15898 {
15899 enum machine_mode vmode;
15900
15901 vmode = mode == SFmode ? V4SFmode : V2DFmode;
15902
15903 if (op0 == CONST0_RTX (mode))
15904 op0 = CONST0_RTX (vmode);
15905 else
15906 {
15907 rtx v = ix86_build_const_vector (mode, false, op0);
15908
15909 op0 = force_reg (vmode, v);
15910 }
15911 }
15912 else if (op0 != CONST0_RTX (mode))
15913 op0 = force_reg (mode, op0);
15914
15915 mask = ix86_build_signbit_mask (mode, 0, 0);
15916
15917 if (mode == SFmode)
15918 copysign_insn = gen_copysignsf3_const;
15919 else if (mode == DFmode)
15920 copysign_insn = gen_copysigndf3_const;
15921 else
15922 copysign_insn = gen_copysigntf3_const;
15923
15924 emit_insn (copysign_insn (dest, op0, op1, mask));
15925 }
15926 else
15927 {
15928 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
15929
15930 nmask = ix86_build_signbit_mask (mode, 0, 1);
15931 mask = ix86_build_signbit_mask (mode, 0, 0);
15932
15933 if (mode == SFmode)
15934 copysign_insn = gen_copysignsf3_var;
15935 else if (mode == DFmode)
15936 copysign_insn = gen_copysigndf3_var;
15937 else
15938 copysign_insn = gen_copysigntf3_var;
15939
15940 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
15941 }
15942 }
15943
15944 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
15945 be a constant, and so has already been expanded into a vector constant. */
15946
15947 void
15948 ix86_split_copysign_const (rtx operands[])
15949 {
15950 enum machine_mode mode, vmode;
15951 rtx dest, op0, mask, x;
15952
15953 dest = operands[0];
15954 op0 = operands[1];
15955 mask = operands[3];
15956
15957 mode = GET_MODE (dest);
15958 vmode = GET_MODE (mask);
15959
15960 dest = simplify_gen_subreg (vmode, dest, mode, 0);
15961 x = gen_rtx_AND (vmode, dest, mask);
15962 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15963
15964 if (op0 != CONST0_RTX (vmode))
15965 {
15966 x = gen_rtx_IOR (vmode, dest, op0);
15967 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15968 }
15969 }
15970
15971 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
15972 so we have to do two masks. */
15973
15974 void
15975 ix86_split_copysign_var (rtx operands[])
15976 {
15977 enum machine_mode mode, vmode;
15978 rtx dest, scratch, op0, op1, mask, nmask, x;
15979
15980 dest = operands[0];
15981 scratch = operands[1];
15982 op0 = operands[2];
15983 op1 = operands[3];
15984 nmask = operands[4];
15985 mask = operands[5];
15986
15987 mode = GET_MODE (dest);
15988 vmode = GET_MODE (mask);
15989
15990 if (rtx_equal_p (op0, op1))
15991 {
15992 /* Shouldn't happen often (it's useless, obviously), but when it does
15993 we'd generate incorrect code if we continue below. */
15994 emit_move_insn (dest, op0);
15995 return;
15996 }
15997
15998 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
15999 {
16000 gcc_assert (REGNO (op1) == REGNO (scratch));
16001
16002 x = gen_rtx_AND (vmode, scratch, mask);
16003 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16004
16005 dest = mask;
16006 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16007 x = gen_rtx_NOT (vmode, dest);
16008 x = gen_rtx_AND (vmode, x, op0);
16009 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16010 }
16011 else
16012 {
16013 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
16014 {
16015 x = gen_rtx_AND (vmode, scratch, mask);
16016 }
16017 else /* alternative 2,4 */
16018 {
16019 gcc_assert (REGNO (mask) == REGNO (scratch));
16020 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
16021 x = gen_rtx_AND (vmode, scratch, op1);
16022 }
16023 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16024
16025 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
16026 {
16027 dest = simplify_gen_subreg (vmode, op0, mode, 0);
16028 x = gen_rtx_AND (vmode, dest, nmask);
16029 }
16030 else /* alternative 3,4 */
16031 {
16032 gcc_assert (REGNO (nmask) == REGNO (dest));
16033 dest = nmask;
16034 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16035 x = gen_rtx_AND (vmode, dest, op0);
16036 }
16037 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16038 }
16039
16040 x = gen_rtx_IOR (vmode, dest, scratch);
16041 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16042 }
16043
16044 /* Return TRUE or FALSE depending on whether the first SET in INSN
16045 has source and destination with matching CC modes, and that the
16046 CC mode is at least as constrained as REQ_MODE. */
16047
16048 bool
16049 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16050 {
16051 rtx set;
16052 enum machine_mode set_mode;
16053
16054 set = PATTERN (insn);
16055 if (GET_CODE (set) == PARALLEL)
16056 set = XVECEXP (set, 0, 0);
16057 gcc_assert (GET_CODE (set) == SET);
16058 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
16059
16060 set_mode = GET_MODE (SET_DEST (set));
16061 switch (set_mode)
16062 {
16063 case CCNOmode:
16064 if (req_mode != CCNOmode
16065 && (req_mode != CCmode
16066 || XEXP (SET_SRC (set), 1) != const0_rtx))
16067 return false;
16068 break;
16069 case CCmode:
16070 if (req_mode == CCGCmode)
16071 return false;
16072 /* FALLTHRU */
16073 case CCGCmode:
16074 if (req_mode == CCGOCmode || req_mode == CCNOmode)
16075 return false;
16076 /* FALLTHRU */
16077 case CCGOCmode:
16078 if (req_mode == CCZmode)
16079 return false;
16080 /* FALLTHRU */
16081 case CCAmode:
16082 case CCCmode:
16083 case CCOmode:
16084 case CCSmode:
16085 case CCZmode:
16086 break;
16087
16088 default:
16089 gcc_unreachable ();
16090 }
16091
16092 return GET_MODE (SET_SRC (set)) == set_mode;
16093 }
16094
16095 /* Generate insn patterns to do an integer compare of OPERANDS. */
16096
16097 static rtx
16098 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
16099 {
16100 enum machine_mode cmpmode;
16101 rtx tmp, flags;
16102
16103 cmpmode = SELECT_CC_MODE (code, op0, op1);
16104 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
16105
16106 /* This is very simple, but making the interface the same as in the
16107 FP case makes the rest of the code easier. */
16108 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
16109 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
16110
16111 /* Return the test that should be put into the flags user, i.e.
16112 the bcc, scc, or cmov instruction. */
16113 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
16114 }
16115
16116 /* Figure out whether to use ordered or unordered fp comparisons.
16117 Return the appropriate mode to use. */
16118
16119 enum machine_mode
16120 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
16121 {
16122 /* ??? In order to make all comparisons reversible, we do all comparisons
16123 non-trapping when compiling for IEEE. Once gcc is able to distinguish
16124 all forms trapping and nontrapping comparisons, we can make inequality
16125 comparisons trapping again, since it results in better code when using
16126 FCOM based compares. */
16127 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
16128 }
16129
16130 enum machine_mode
16131 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
16132 {
16133 enum machine_mode mode = GET_MODE (op0);
16134
16135 if (SCALAR_FLOAT_MODE_P (mode))
16136 {
16137 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16138 return ix86_fp_compare_mode (code);
16139 }
16140
16141 switch (code)
16142 {
16143 /* Only zero flag is needed. */
16144 case EQ: /* ZF=0 */
16145 case NE: /* ZF!=0 */
16146 return CCZmode;
16147 /* Codes needing carry flag. */
16148 case GEU: /* CF=0 */
16149 case LTU: /* CF=1 */
16150 /* Detect overflow checks. They need just the carry flag. */
16151 if (GET_CODE (op0) == PLUS
16152 && rtx_equal_p (op1, XEXP (op0, 0)))
16153 return CCCmode;
16154 else
16155 return CCmode;
16156 case GTU: /* CF=0 & ZF=0 */
16157 case LEU: /* CF=1 | ZF=1 */
16158 /* Detect overflow checks. They need just the carry flag. */
16159 if (GET_CODE (op0) == MINUS
16160 && rtx_equal_p (op1, XEXP (op0, 0)))
16161 return CCCmode;
16162 else
16163 return CCmode;
16164 /* Codes possibly doable only with sign flag when
16165 comparing against zero. */
16166 case GE: /* SF=OF or SF=0 */
16167 case LT: /* SF<>OF or SF=1 */
16168 if (op1 == const0_rtx)
16169 return CCGOCmode;
16170 else
16171 /* For other cases Carry flag is not required. */
16172 return CCGCmode;
16173 /* Codes doable only with sign flag when comparing
16174 against zero, but we miss jump instruction for it
16175 so we need to use relational tests against overflow
16176 that thus needs to be zero. */
16177 case GT: /* ZF=0 & SF=OF */
16178 case LE: /* ZF=1 | SF<>OF */
16179 if (op1 == const0_rtx)
16180 return CCNOmode;
16181 else
16182 return CCGCmode;
16183 /* strcmp pattern do (use flags) and combine may ask us for proper
16184 mode. */
16185 case USE:
16186 return CCmode;
16187 default:
16188 gcc_unreachable ();
16189 }
16190 }
16191
16192 /* Return the fixed registers used for condition codes. */
16193
16194 static bool
16195 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
16196 {
16197 *p1 = FLAGS_REG;
16198 *p2 = FPSR_REG;
16199 return true;
16200 }
16201
16202 /* If two condition code modes are compatible, return a condition code
16203 mode which is compatible with both. Otherwise, return
16204 VOIDmode. */
16205
16206 static enum machine_mode
16207 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
16208 {
16209 if (m1 == m2)
16210 return m1;
16211
16212 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
16213 return VOIDmode;
16214
16215 if ((m1 == CCGCmode && m2 == CCGOCmode)
16216 || (m1 == CCGOCmode && m2 == CCGCmode))
16217 return CCGCmode;
16218
16219 switch (m1)
16220 {
16221 default:
16222 gcc_unreachable ();
16223
16224 case CCmode:
16225 case CCGCmode:
16226 case CCGOCmode:
16227 case CCNOmode:
16228 case CCAmode:
16229 case CCCmode:
16230 case CCOmode:
16231 case CCSmode:
16232 case CCZmode:
16233 switch (m2)
16234 {
16235 default:
16236 return VOIDmode;
16237
16238 case CCmode:
16239 case CCGCmode:
16240 case CCGOCmode:
16241 case CCNOmode:
16242 case CCAmode:
16243 case CCCmode:
16244 case CCOmode:
16245 case CCSmode:
16246 case CCZmode:
16247 return CCmode;
16248 }
16249
16250 case CCFPmode:
16251 case CCFPUmode:
16252 /* These are only compatible with themselves, which we already
16253 checked above. */
16254 return VOIDmode;
16255 }
16256 }
16257
16258
16259 /* Return a comparison we can do and that it is equivalent to
16260 swap_condition (code) apart possibly from orderedness.
16261 But, never change orderedness if TARGET_IEEE_FP, returning
16262 UNKNOWN in that case if necessary. */
16263
16264 static enum rtx_code
16265 ix86_fp_swap_condition (enum rtx_code code)
16266 {
16267 switch (code)
16268 {
16269 case GT: /* GTU - CF=0 & ZF=0 */
16270 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
16271 case GE: /* GEU - CF=0 */
16272 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
16273 case UNLT: /* LTU - CF=1 */
16274 return TARGET_IEEE_FP ? UNKNOWN : GT;
16275 case UNLE: /* LEU - CF=1 | ZF=1 */
16276 return TARGET_IEEE_FP ? UNKNOWN : GE;
16277 default:
16278 return swap_condition (code);
16279 }
16280 }
16281
16282 /* Return cost of comparison CODE using the best strategy for performance.
16283 All following functions do use number of instructions as a cost metrics.
16284 In future this should be tweaked to compute bytes for optimize_size and
16285 take into account performance of various instructions on various CPUs. */
16286
16287 static int
16288 ix86_fp_comparison_cost (enum rtx_code code)
16289 {
16290 int arith_cost;
16291
16292 /* The cost of code using bit-twiddling on %ah. */
16293 switch (code)
16294 {
16295 case UNLE:
16296 case UNLT:
16297 case LTGT:
16298 case GT:
16299 case GE:
16300 case UNORDERED:
16301 case ORDERED:
16302 case UNEQ:
16303 arith_cost = 4;
16304 break;
16305 case LT:
16306 case NE:
16307 case EQ:
16308 case UNGE:
16309 arith_cost = TARGET_IEEE_FP ? 5 : 4;
16310 break;
16311 case LE:
16312 case UNGT:
16313 arith_cost = TARGET_IEEE_FP ? 6 : 4;
16314 break;
16315 default:
16316 gcc_unreachable ();
16317 }
16318
16319 switch (ix86_fp_comparison_strategy (code))
16320 {
16321 case IX86_FPCMP_COMI:
16322 return arith_cost > 4 ? 3 : 2;
16323 case IX86_FPCMP_SAHF:
16324 return arith_cost > 4 ? 4 : 3;
16325 default:
16326 return arith_cost;
16327 }
16328 }
16329
16330 /* Return strategy to use for floating-point. We assume that fcomi is always
16331 preferrable where available, since that is also true when looking at size
16332 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
16333
16334 enum ix86_fpcmp_strategy
16335 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
16336 {
16337 /* Do fcomi/sahf based test when profitable. */
16338
16339 if (TARGET_CMOVE)
16340 return IX86_FPCMP_COMI;
16341
16342 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
16343 return IX86_FPCMP_SAHF;
16344
16345 return IX86_FPCMP_ARITH;
16346 }
16347
16348 /* Swap, force into registers, or otherwise massage the two operands
16349 to a fp comparison. The operands are updated in place; the new
16350 comparison code is returned. */
16351
16352 static enum rtx_code
16353 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
16354 {
16355 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
16356 rtx op0 = *pop0, op1 = *pop1;
16357 enum machine_mode op_mode = GET_MODE (op0);
16358 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
16359
16360 /* All of the unordered compare instructions only work on registers.
16361 The same is true of the fcomi compare instructions. The XFmode
16362 compare instructions require registers except when comparing
16363 against zero or when converting operand 1 from fixed point to
16364 floating point. */
16365
16366 if (!is_sse
16367 && (fpcmp_mode == CCFPUmode
16368 || (op_mode == XFmode
16369 && ! (standard_80387_constant_p (op0) == 1
16370 || standard_80387_constant_p (op1) == 1)
16371 && GET_CODE (op1) != FLOAT)
16372 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
16373 {
16374 op0 = force_reg (op_mode, op0);
16375 op1 = force_reg (op_mode, op1);
16376 }
16377 else
16378 {
16379 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
16380 things around if they appear profitable, otherwise force op0
16381 into a register. */
16382
16383 if (standard_80387_constant_p (op0) == 0
16384 || (MEM_P (op0)
16385 && ! (standard_80387_constant_p (op1) == 0
16386 || MEM_P (op1))))
16387 {
16388 enum rtx_code new_code = ix86_fp_swap_condition (code);
16389 if (new_code != UNKNOWN)
16390 {
16391 rtx tmp;
16392 tmp = op0, op0 = op1, op1 = tmp;
16393 code = new_code;
16394 }
16395 }
16396
16397 if (!REG_P (op0))
16398 op0 = force_reg (op_mode, op0);
16399
16400 if (CONSTANT_P (op1))
16401 {
16402 int tmp = standard_80387_constant_p (op1);
16403 if (tmp == 0)
16404 op1 = validize_mem (force_const_mem (op_mode, op1));
16405 else if (tmp == 1)
16406 {
16407 if (TARGET_CMOVE)
16408 op1 = force_reg (op_mode, op1);
16409 }
16410 else
16411 op1 = force_reg (op_mode, op1);
16412 }
16413 }
16414
16415 /* Try to rearrange the comparison to make it cheaper. */
16416 if (ix86_fp_comparison_cost (code)
16417 > ix86_fp_comparison_cost (swap_condition (code))
16418 && (REG_P (op1) || can_create_pseudo_p ()))
16419 {
16420 rtx tmp;
16421 tmp = op0, op0 = op1, op1 = tmp;
16422 code = swap_condition (code);
16423 if (!REG_P (op0))
16424 op0 = force_reg (op_mode, op0);
16425 }
16426
16427 *pop0 = op0;
16428 *pop1 = op1;
16429 return code;
16430 }
16431
16432 /* Convert comparison codes we use to represent FP comparison to integer
16433 code that will result in proper branch. Return UNKNOWN if no such code
16434 is available. */
16435
16436 enum rtx_code
16437 ix86_fp_compare_code_to_integer (enum rtx_code code)
16438 {
16439 switch (code)
16440 {
16441 case GT:
16442 return GTU;
16443 case GE:
16444 return GEU;
16445 case ORDERED:
16446 case UNORDERED:
16447 return code;
16448 break;
16449 case UNEQ:
16450 return EQ;
16451 break;
16452 case UNLT:
16453 return LTU;
16454 break;
16455 case UNLE:
16456 return LEU;
16457 break;
16458 case LTGT:
16459 return NE;
16460 break;
16461 default:
16462 return UNKNOWN;
16463 }
16464 }
16465
16466 /* Generate insn patterns to do a floating point compare of OPERANDS. */
16467
16468 static rtx
16469 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
16470 {
16471 enum machine_mode fpcmp_mode, intcmp_mode;
16472 rtx tmp, tmp2;
16473
16474 fpcmp_mode = ix86_fp_compare_mode (code);
16475 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
16476
16477 /* Do fcomi/sahf based test when profitable. */
16478 switch (ix86_fp_comparison_strategy (code))
16479 {
16480 case IX86_FPCMP_COMI:
16481 intcmp_mode = fpcmp_mode;
16482 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
16483 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
16484 tmp);
16485 emit_insn (tmp);
16486 break;
16487
16488 case IX86_FPCMP_SAHF:
16489 intcmp_mode = fpcmp_mode;
16490 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
16491 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
16492 tmp);
16493
16494 if (!scratch)
16495 scratch = gen_reg_rtx (HImode);
16496 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
16497 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
16498 break;
16499
16500 case IX86_FPCMP_ARITH:
16501 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
16502 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
16503 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
16504 if (!scratch)
16505 scratch = gen_reg_rtx (HImode);
16506 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
16507
16508 /* In the unordered case, we have to check C2 for NaN's, which
16509 doesn't happen to work out to anything nice combination-wise.
16510 So do some bit twiddling on the value we've got in AH to come
16511 up with an appropriate set of condition codes. */
16512
16513 intcmp_mode = CCNOmode;
16514 switch (code)
16515 {
16516 case GT:
16517 case UNGT:
16518 if (code == GT || !TARGET_IEEE_FP)
16519 {
16520 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
16521 code = EQ;
16522 }
16523 else
16524 {
16525 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16526 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
16527 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
16528 intcmp_mode = CCmode;
16529 code = GEU;
16530 }
16531 break;
16532 case LT:
16533 case UNLT:
16534 if (code == LT && TARGET_IEEE_FP)
16535 {
16536 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16537 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
16538 intcmp_mode = CCmode;
16539 code = EQ;
16540 }
16541 else
16542 {
16543 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
16544 code = NE;
16545 }
16546 break;
16547 case GE:
16548 case UNGE:
16549 if (code == GE || !TARGET_IEEE_FP)
16550 {
16551 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
16552 code = EQ;
16553 }
16554 else
16555 {
16556 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16557 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
16558 code = NE;
16559 }
16560 break;
16561 case LE:
16562 case UNLE:
16563 if (code == LE && TARGET_IEEE_FP)
16564 {
16565 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16566 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
16567 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
16568 intcmp_mode = CCmode;
16569 code = LTU;
16570 }
16571 else
16572 {
16573 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
16574 code = NE;
16575 }
16576 break;
16577 case EQ:
16578 case UNEQ:
16579 if (code == EQ && TARGET_IEEE_FP)
16580 {
16581 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16582 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
16583 intcmp_mode = CCmode;
16584 code = EQ;
16585 }
16586 else
16587 {
16588 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
16589 code = NE;
16590 }
16591 break;
16592 case NE:
16593 case LTGT:
16594 if (code == NE && TARGET_IEEE_FP)
16595 {
16596 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16597 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
16598 GEN_INT (0x40)));
16599 code = NE;
16600 }
16601 else
16602 {
16603 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
16604 code = EQ;
16605 }
16606 break;
16607
16608 case UNORDERED:
16609 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
16610 code = NE;
16611 break;
16612 case ORDERED:
16613 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
16614 code = EQ;
16615 break;
16616
16617 default:
16618 gcc_unreachable ();
16619 }
16620 break;
16621
16622 default:
16623 gcc_unreachable();
16624 }
16625
16626 /* Return the test that should be put into the flags user, i.e.
16627 the bcc, scc, or cmov instruction. */
16628 return gen_rtx_fmt_ee (code, VOIDmode,
16629 gen_rtx_REG (intcmp_mode, FLAGS_REG),
16630 const0_rtx);
16631 }
16632
16633 static rtx
16634 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
16635 {
16636 rtx ret;
16637
16638 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
16639 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
16640
16641 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
16642 {
16643 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
16644 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
16645 }
16646 else
16647 ret = ix86_expand_int_compare (code, op0, op1);
16648
16649 return ret;
16650 }
16651
16652 void
16653 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
16654 {
16655 enum machine_mode mode = GET_MODE (op0);
16656 rtx tmp;
16657
16658 switch (mode)
16659 {
16660 case SFmode:
16661 case DFmode:
16662 case XFmode:
16663 case QImode:
16664 case HImode:
16665 case SImode:
16666 simple:
16667 tmp = ix86_expand_compare (code, op0, op1);
16668 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16669 gen_rtx_LABEL_REF (VOIDmode, label),
16670 pc_rtx);
16671 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
16672 return;
16673
16674 case DImode:
16675 if (TARGET_64BIT)
16676 goto simple;
16677 case TImode:
16678 /* Expand DImode branch into multiple compare+branch. */
16679 {
16680 rtx lo[2], hi[2], label2;
16681 enum rtx_code code1, code2, code3;
16682 enum machine_mode submode;
16683
16684 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
16685 {
16686 tmp = op0, op0 = op1, op1 = tmp;
16687 code = swap_condition (code);
16688 }
16689
16690 split_double_mode (mode, &op0, 1, lo+0, hi+0);
16691 split_double_mode (mode, &op1, 1, lo+1, hi+1);
16692
16693 submode = mode == DImode ? SImode : DImode;
16694
16695 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
16696 avoid two branches. This costs one extra insn, so disable when
16697 optimizing for size. */
16698
16699 if ((code == EQ || code == NE)
16700 && (!optimize_insn_for_size_p ()
16701 || hi[1] == const0_rtx || lo[1] == const0_rtx))
16702 {
16703 rtx xor0, xor1;
16704
16705 xor1 = hi[0];
16706 if (hi[1] != const0_rtx)
16707 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
16708 NULL_RTX, 0, OPTAB_WIDEN);
16709
16710 xor0 = lo[0];
16711 if (lo[1] != const0_rtx)
16712 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
16713 NULL_RTX, 0, OPTAB_WIDEN);
16714
16715 tmp = expand_binop (submode, ior_optab, xor1, xor0,
16716 NULL_RTX, 0, OPTAB_WIDEN);
16717
16718 ix86_expand_branch (code, tmp, const0_rtx, label);
16719 return;
16720 }
16721
16722 /* Otherwise, if we are doing less-than or greater-or-equal-than,
16723 op1 is a constant and the low word is zero, then we can just
16724 examine the high word. Similarly for low word -1 and
16725 less-or-equal-than or greater-than. */
16726
16727 if (CONST_INT_P (hi[1]))
16728 switch (code)
16729 {
16730 case LT: case LTU: case GE: case GEU:
16731 if (lo[1] == const0_rtx)
16732 {
16733 ix86_expand_branch (code, hi[0], hi[1], label);
16734 return;
16735 }
16736 break;
16737 case LE: case LEU: case GT: case GTU:
16738 if (lo[1] == constm1_rtx)
16739 {
16740 ix86_expand_branch (code, hi[0], hi[1], label);
16741 return;
16742 }
16743 break;
16744 default:
16745 break;
16746 }
16747
16748 /* Otherwise, we need two or three jumps. */
16749
16750 label2 = gen_label_rtx ();
16751
16752 code1 = code;
16753 code2 = swap_condition (code);
16754 code3 = unsigned_condition (code);
16755
16756 switch (code)
16757 {
16758 case LT: case GT: case LTU: case GTU:
16759 break;
16760
16761 case LE: code1 = LT; code2 = GT; break;
16762 case GE: code1 = GT; code2 = LT; break;
16763 case LEU: code1 = LTU; code2 = GTU; break;
16764 case GEU: code1 = GTU; code2 = LTU; break;
16765
16766 case EQ: code1 = UNKNOWN; code2 = NE; break;
16767 case NE: code2 = UNKNOWN; break;
16768
16769 default:
16770 gcc_unreachable ();
16771 }
16772
16773 /*
16774 * a < b =>
16775 * if (hi(a) < hi(b)) goto true;
16776 * if (hi(a) > hi(b)) goto false;
16777 * if (lo(a) < lo(b)) goto true;
16778 * false:
16779 */
16780
16781 if (code1 != UNKNOWN)
16782 ix86_expand_branch (code1, hi[0], hi[1], label);
16783 if (code2 != UNKNOWN)
16784 ix86_expand_branch (code2, hi[0], hi[1], label2);
16785
16786 ix86_expand_branch (code3, lo[0], lo[1], label);
16787
16788 if (code2 != UNKNOWN)
16789 emit_label (label2);
16790 return;
16791 }
16792
16793 default:
16794 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
16795 goto simple;
16796 }
16797 }
16798
16799 /* Split branch based on floating point condition. */
16800 void
16801 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
16802 rtx target1, rtx target2, rtx tmp, rtx pushed)
16803 {
16804 rtx condition;
16805 rtx i;
16806
16807 if (target2 != pc_rtx)
16808 {
16809 rtx tmp = target2;
16810 code = reverse_condition_maybe_unordered (code);
16811 target2 = target1;
16812 target1 = tmp;
16813 }
16814
16815 condition = ix86_expand_fp_compare (code, op1, op2,
16816 tmp);
16817
16818 /* Remove pushed operand from stack. */
16819 if (pushed)
16820 ix86_free_from_memory (GET_MODE (pushed));
16821
16822 i = emit_jump_insn (gen_rtx_SET
16823 (VOIDmode, pc_rtx,
16824 gen_rtx_IF_THEN_ELSE (VOIDmode,
16825 condition, target1, target2)));
16826 if (split_branch_probability >= 0)
16827 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
16828 }
16829
16830 void
16831 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
16832 {
16833 rtx ret;
16834
16835 gcc_assert (GET_MODE (dest) == QImode);
16836
16837 ret = ix86_expand_compare (code, op0, op1);
16838 PUT_MODE (ret, QImode);
16839 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
16840 }
16841
16842 /* Expand comparison setting or clearing carry flag. Return true when
16843 successful and set pop for the operation. */
16844 static bool
16845 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
16846 {
16847 enum machine_mode mode =
16848 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
16849
16850 /* Do not handle double-mode compares that go through special path. */
16851 if (mode == (TARGET_64BIT ? TImode : DImode))
16852 return false;
16853
16854 if (SCALAR_FLOAT_MODE_P (mode))
16855 {
16856 rtx compare_op, compare_seq;
16857
16858 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16859
16860 /* Shortcut: following common codes never translate
16861 into carry flag compares. */
16862 if (code == EQ || code == NE || code == UNEQ || code == LTGT
16863 || code == ORDERED || code == UNORDERED)
16864 return false;
16865
16866 /* These comparisons require zero flag; swap operands so they won't. */
16867 if ((code == GT || code == UNLE || code == LE || code == UNGT)
16868 && !TARGET_IEEE_FP)
16869 {
16870 rtx tmp = op0;
16871 op0 = op1;
16872 op1 = tmp;
16873 code = swap_condition (code);
16874 }
16875
16876 /* Try to expand the comparison and verify that we end up with
16877 carry flag based comparison. This fails to be true only when
16878 we decide to expand comparison using arithmetic that is not
16879 too common scenario. */
16880 start_sequence ();
16881 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
16882 compare_seq = get_insns ();
16883 end_sequence ();
16884
16885 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16886 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16887 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
16888 else
16889 code = GET_CODE (compare_op);
16890
16891 if (code != LTU && code != GEU)
16892 return false;
16893
16894 emit_insn (compare_seq);
16895 *pop = compare_op;
16896 return true;
16897 }
16898
16899 if (!INTEGRAL_MODE_P (mode))
16900 return false;
16901
16902 switch (code)
16903 {
16904 case LTU:
16905 case GEU:
16906 break;
16907
16908 /* Convert a==0 into (unsigned)a<1. */
16909 case EQ:
16910 case NE:
16911 if (op1 != const0_rtx)
16912 return false;
16913 op1 = const1_rtx;
16914 code = (code == EQ ? LTU : GEU);
16915 break;
16916
16917 /* Convert a>b into b<a or a>=b-1. */
16918 case GTU:
16919 case LEU:
16920 if (CONST_INT_P (op1))
16921 {
16922 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
16923 /* Bail out on overflow. We still can swap operands but that
16924 would force loading of the constant into register. */
16925 if (op1 == const0_rtx
16926 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
16927 return false;
16928 code = (code == GTU ? GEU : LTU);
16929 }
16930 else
16931 {
16932 rtx tmp = op1;
16933 op1 = op0;
16934 op0 = tmp;
16935 code = (code == GTU ? LTU : GEU);
16936 }
16937 break;
16938
16939 /* Convert a>=0 into (unsigned)a<0x80000000. */
16940 case LT:
16941 case GE:
16942 if (mode == DImode || op1 != const0_rtx)
16943 return false;
16944 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
16945 code = (code == LT ? GEU : LTU);
16946 break;
16947 case LE:
16948 case GT:
16949 if (mode == DImode || op1 != constm1_rtx)
16950 return false;
16951 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
16952 code = (code == LE ? GEU : LTU);
16953 break;
16954
16955 default:
16956 return false;
16957 }
16958 /* Swapping operands may cause constant to appear as first operand. */
16959 if (!nonimmediate_operand (op0, VOIDmode))
16960 {
16961 if (!can_create_pseudo_p ())
16962 return false;
16963 op0 = force_reg (mode, op0);
16964 }
16965 *pop = ix86_expand_compare (code, op0, op1);
16966 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
16967 return true;
16968 }
16969
16970 bool
16971 ix86_expand_int_movcc (rtx operands[])
16972 {
16973 enum rtx_code code = GET_CODE (operands[1]), compare_code;
16974 rtx compare_seq, compare_op;
16975 enum machine_mode mode = GET_MODE (operands[0]);
16976 bool sign_bit_compare_p = false;
16977 rtx op0 = XEXP (operands[1], 0);
16978 rtx op1 = XEXP (operands[1], 1);
16979
16980 start_sequence ();
16981 compare_op = ix86_expand_compare (code, op0, op1);
16982 compare_seq = get_insns ();
16983 end_sequence ();
16984
16985 compare_code = GET_CODE (compare_op);
16986
16987 if ((op1 == const0_rtx && (code == GE || code == LT))
16988 || (op1 == constm1_rtx && (code == GT || code == LE)))
16989 sign_bit_compare_p = true;
16990
16991 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
16992 HImode insns, we'd be swallowed in word prefix ops. */
16993
16994 if ((mode != HImode || TARGET_FAST_PREFIX)
16995 && (mode != (TARGET_64BIT ? TImode : DImode))
16996 && CONST_INT_P (operands[2])
16997 && CONST_INT_P (operands[3]))
16998 {
16999 rtx out = operands[0];
17000 HOST_WIDE_INT ct = INTVAL (operands[2]);
17001 HOST_WIDE_INT cf = INTVAL (operands[3]);
17002 HOST_WIDE_INT diff;
17003
17004 diff = ct - cf;
17005 /* Sign bit compares are better done using shifts than we do by using
17006 sbb. */
17007 if (sign_bit_compare_p
17008 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
17009 {
17010 /* Detect overlap between destination and compare sources. */
17011 rtx tmp = out;
17012
17013 if (!sign_bit_compare_p)
17014 {
17015 rtx flags;
17016 bool fpcmp = false;
17017
17018 compare_code = GET_CODE (compare_op);
17019
17020 flags = XEXP (compare_op, 0);
17021
17022 if (GET_MODE (flags) == CCFPmode
17023 || GET_MODE (flags) == CCFPUmode)
17024 {
17025 fpcmp = true;
17026 compare_code
17027 = ix86_fp_compare_code_to_integer (compare_code);
17028 }
17029
17030 /* To simplify rest of code, restrict to the GEU case. */
17031 if (compare_code == LTU)
17032 {
17033 HOST_WIDE_INT tmp = ct;
17034 ct = cf;
17035 cf = tmp;
17036 compare_code = reverse_condition (compare_code);
17037 code = reverse_condition (code);
17038 }
17039 else
17040 {
17041 if (fpcmp)
17042 PUT_CODE (compare_op,
17043 reverse_condition_maybe_unordered
17044 (GET_CODE (compare_op)));
17045 else
17046 PUT_CODE (compare_op,
17047 reverse_condition (GET_CODE (compare_op)));
17048 }
17049 diff = ct - cf;
17050
17051 if (reg_overlap_mentioned_p (out, op0)
17052 || reg_overlap_mentioned_p (out, op1))
17053 tmp = gen_reg_rtx (mode);
17054
17055 if (mode == DImode)
17056 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
17057 else
17058 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
17059 flags, compare_op));
17060 }
17061 else
17062 {
17063 if (code == GT || code == GE)
17064 code = reverse_condition (code);
17065 else
17066 {
17067 HOST_WIDE_INT tmp = ct;
17068 ct = cf;
17069 cf = tmp;
17070 diff = ct - cf;
17071 }
17072 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
17073 }
17074
17075 if (diff == 1)
17076 {
17077 /*
17078 * cmpl op0,op1
17079 * sbbl dest,dest
17080 * [addl dest, ct]
17081 *
17082 * Size 5 - 8.
17083 */
17084 if (ct)
17085 tmp = expand_simple_binop (mode, PLUS,
17086 tmp, GEN_INT (ct),
17087 copy_rtx (tmp), 1, OPTAB_DIRECT);
17088 }
17089 else if (cf == -1)
17090 {
17091 /*
17092 * cmpl op0,op1
17093 * sbbl dest,dest
17094 * orl $ct, dest
17095 *
17096 * Size 8.
17097 */
17098 tmp = expand_simple_binop (mode, IOR,
17099 tmp, GEN_INT (ct),
17100 copy_rtx (tmp), 1, OPTAB_DIRECT);
17101 }
17102 else if (diff == -1 && ct)
17103 {
17104 /*
17105 * cmpl op0,op1
17106 * sbbl dest,dest
17107 * notl dest
17108 * [addl dest, cf]
17109 *
17110 * Size 8 - 11.
17111 */
17112 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17113 if (cf)
17114 tmp = expand_simple_binop (mode, PLUS,
17115 copy_rtx (tmp), GEN_INT (cf),
17116 copy_rtx (tmp), 1, OPTAB_DIRECT);
17117 }
17118 else
17119 {
17120 /*
17121 * cmpl op0,op1
17122 * sbbl dest,dest
17123 * [notl dest]
17124 * andl cf - ct, dest
17125 * [addl dest, ct]
17126 *
17127 * Size 8 - 11.
17128 */
17129
17130 if (cf == 0)
17131 {
17132 cf = ct;
17133 ct = 0;
17134 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17135 }
17136
17137 tmp = expand_simple_binop (mode, AND,
17138 copy_rtx (tmp),
17139 gen_int_mode (cf - ct, mode),
17140 copy_rtx (tmp), 1, OPTAB_DIRECT);
17141 if (ct)
17142 tmp = expand_simple_binop (mode, PLUS,
17143 copy_rtx (tmp), GEN_INT (ct),
17144 copy_rtx (tmp), 1, OPTAB_DIRECT);
17145 }
17146
17147 if (!rtx_equal_p (tmp, out))
17148 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
17149
17150 return true;
17151 }
17152
17153 if (diff < 0)
17154 {
17155 enum machine_mode cmp_mode = GET_MODE (op0);
17156
17157 HOST_WIDE_INT tmp;
17158 tmp = ct, ct = cf, cf = tmp;
17159 diff = -diff;
17160
17161 if (SCALAR_FLOAT_MODE_P (cmp_mode))
17162 {
17163 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
17164
17165 /* We may be reversing unordered compare to normal compare, that
17166 is not valid in general (we may convert non-trapping condition
17167 to trapping one), however on i386 we currently emit all
17168 comparisons unordered. */
17169 compare_code = reverse_condition_maybe_unordered (compare_code);
17170 code = reverse_condition_maybe_unordered (code);
17171 }
17172 else
17173 {
17174 compare_code = reverse_condition (compare_code);
17175 code = reverse_condition (code);
17176 }
17177 }
17178
17179 compare_code = UNKNOWN;
17180 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
17181 && CONST_INT_P (op1))
17182 {
17183 if (op1 == const0_rtx
17184 && (code == LT || code == GE))
17185 compare_code = code;
17186 else if (op1 == constm1_rtx)
17187 {
17188 if (code == LE)
17189 compare_code = LT;
17190 else if (code == GT)
17191 compare_code = GE;
17192 }
17193 }
17194
17195 /* Optimize dest = (op0 < 0) ? -1 : cf. */
17196 if (compare_code != UNKNOWN
17197 && GET_MODE (op0) == GET_MODE (out)
17198 && (cf == -1 || ct == -1))
17199 {
17200 /* If lea code below could be used, only optimize
17201 if it results in a 2 insn sequence. */
17202
17203 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
17204 || diff == 3 || diff == 5 || diff == 9)
17205 || (compare_code == LT && ct == -1)
17206 || (compare_code == GE && cf == -1))
17207 {
17208 /*
17209 * notl op1 (if necessary)
17210 * sarl $31, op1
17211 * orl cf, op1
17212 */
17213 if (ct != -1)
17214 {
17215 cf = ct;
17216 ct = -1;
17217 code = reverse_condition (code);
17218 }
17219
17220 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
17221
17222 out = expand_simple_binop (mode, IOR,
17223 out, GEN_INT (cf),
17224 out, 1, OPTAB_DIRECT);
17225 if (out != operands[0])
17226 emit_move_insn (operands[0], out);
17227
17228 return true;
17229 }
17230 }
17231
17232
17233 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
17234 || diff == 3 || diff == 5 || diff == 9)
17235 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
17236 && (mode != DImode
17237 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
17238 {
17239 /*
17240 * xorl dest,dest
17241 * cmpl op1,op2
17242 * setcc dest
17243 * lea cf(dest*(ct-cf)),dest
17244 *
17245 * Size 14.
17246 *
17247 * This also catches the degenerate setcc-only case.
17248 */
17249
17250 rtx tmp;
17251 int nops;
17252
17253 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
17254
17255 nops = 0;
17256 /* On x86_64 the lea instruction operates on Pmode, so we need
17257 to get arithmetics done in proper mode to match. */
17258 if (diff == 1)
17259 tmp = copy_rtx (out);
17260 else
17261 {
17262 rtx out1;
17263 out1 = copy_rtx (out);
17264 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
17265 nops++;
17266 if (diff & 1)
17267 {
17268 tmp = gen_rtx_PLUS (mode, tmp, out1);
17269 nops++;
17270 }
17271 }
17272 if (cf != 0)
17273 {
17274 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
17275 nops++;
17276 }
17277 if (!rtx_equal_p (tmp, out))
17278 {
17279 if (nops == 1)
17280 out = force_operand (tmp, copy_rtx (out));
17281 else
17282 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
17283 }
17284 if (!rtx_equal_p (out, operands[0]))
17285 emit_move_insn (operands[0], copy_rtx (out));
17286
17287 return true;
17288 }
17289
17290 /*
17291 * General case: Jumpful:
17292 * xorl dest,dest cmpl op1, op2
17293 * cmpl op1, op2 movl ct, dest
17294 * setcc dest jcc 1f
17295 * decl dest movl cf, dest
17296 * andl (cf-ct),dest 1:
17297 * addl ct,dest
17298 *
17299 * Size 20. Size 14.
17300 *
17301 * This is reasonably steep, but branch mispredict costs are
17302 * high on modern cpus, so consider failing only if optimizing
17303 * for space.
17304 */
17305
17306 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
17307 && BRANCH_COST (optimize_insn_for_speed_p (),
17308 false) >= 2)
17309 {
17310 if (cf == 0)
17311 {
17312 enum machine_mode cmp_mode = GET_MODE (op0);
17313
17314 cf = ct;
17315 ct = 0;
17316
17317 if (SCALAR_FLOAT_MODE_P (cmp_mode))
17318 {
17319 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
17320
17321 /* We may be reversing unordered compare to normal compare,
17322 that is not valid in general (we may convert non-trapping
17323 condition to trapping one), however on i386 we currently
17324 emit all comparisons unordered. */
17325 code = reverse_condition_maybe_unordered (code);
17326 }
17327 else
17328 {
17329 code = reverse_condition (code);
17330 if (compare_code != UNKNOWN)
17331 compare_code = reverse_condition (compare_code);
17332 }
17333 }
17334
17335 if (compare_code != UNKNOWN)
17336 {
17337 /* notl op1 (if needed)
17338 sarl $31, op1
17339 andl (cf-ct), op1
17340 addl ct, op1
17341
17342 For x < 0 (resp. x <= -1) there will be no notl,
17343 so if possible swap the constants to get rid of the
17344 complement.
17345 True/false will be -1/0 while code below (store flag
17346 followed by decrement) is 0/-1, so the constants need
17347 to be exchanged once more. */
17348
17349 if (compare_code == GE || !cf)
17350 {
17351 code = reverse_condition (code);
17352 compare_code = LT;
17353 }
17354 else
17355 {
17356 HOST_WIDE_INT tmp = cf;
17357 cf = ct;
17358 ct = tmp;
17359 }
17360
17361 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
17362 }
17363 else
17364 {
17365 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
17366
17367 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
17368 constm1_rtx,
17369 copy_rtx (out), 1, OPTAB_DIRECT);
17370 }
17371
17372 out = expand_simple_binop (mode, AND, copy_rtx (out),
17373 gen_int_mode (cf - ct, mode),
17374 copy_rtx (out), 1, OPTAB_DIRECT);
17375 if (ct)
17376 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
17377 copy_rtx (out), 1, OPTAB_DIRECT);
17378 if (!rtx_equal_p (out, operands[0]))
17379 emit_move_insn (operands[0], copy_rtx (out));
17380
17381 return true;
17382 }
17383 }
17384
17385 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
17386 {
17387 /* Try a few things more with specific constants and a variable. */
17388
17389 optab op;
17390 rtx var, orig_out, out, tmp;
17391
17392 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
17393 return false;
17394
17395 /* If one of the two operands is an interesting constant, load a
17396 constant with the above and mask it in with a logical operation. */
17397
17398 if (CONST_INT_P (operands[2]))
17399 {
17400 var = operands[3];
17401 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
17402 operands[3] = constm1_rtx, op = and_optab;
17403 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
17404 operands[3] = const0_rtx, op = ior_optab;
17405 else
17406 return false;
17407 }
17408 else if (CONST_INT_P (operands[3]))
17409 {
17410 var = operands[2];
17411 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
17412 operands[2] = constm1_rtx, op = and_optab;
17413 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
17414 operands[2] = const0_rtx, op = ior_optab;
17415 else
17416 return false;
17417 }
17418 else
17419 return false;
17420
17421 orig_out = operands[0];
17422 tmp = gen_reg_rtx (mode);
17423 operands[0] = tmp;
17424
17425 /* Recurse to get the constant loaded. */
17426 if (ix86_expand_int_movcc (operands) == 0)
17427 return false;
17428
17429 /* Mask in the interesting variable. */
17430 out = expand_binop (mode, op, var, tmp, orig_out, 0,
17431 OPTAB_WIDEN);
17432 if (!rtx_equal_p (out, orig_out))
17433 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
17434
17435 return true;
17436 }
17437
17438 /*
17439 * For comparison with above,
17440 *
17441 * movl cf,dest
17442 * movl ct,tmp
17443 * cmpl op1,op2
17444 * cmovcc tmp,dest
17445 *
17446 * Size 15.
17447 */
17448
17449 if (! nonimmediate_operand (operands[2], mode))
17450 operands[2] = force_reg (mode, operands[2]);
17451 if (! nonimmediate_operand (operands[3], mode))
17452 operands[3] = force_reg (mode, operands[3]);
17453
17454 if (! register_operand (operands[2], VOIDmode)
17455 && (mode == QImode
17456 || ! register_operand (operands[3], VOIDmode)))
17457 operands[2] = force_reg (mode, operands[2]);
17458
17459 if (mode == QImode
17460 && ! register_operand (operands[3], VOIDmode))
17461 operands[3] = force_reg (mode, operands[3]);
17462
17463 emit_insn (compare_seq);
17464 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17465 gen_rtx_IF_THEN_ELSE (mode,
17466 compare_op, operands[2],
17467 operands[3])));
17468 return true;
17469 }
17470
17471 /* Swap, force into registers, or otherwise massage the two operands
17472 to an sse comparison with a mask result. Thus we differ a bit from
17473 ix86_prepare_fp_compare_args which expects to produce a flags result.
17474
17475 The DEST operand exists to help determine whether to commute commutative
17476 operators. The POP0/POP1 operands are updated in place. The new
17477 comparison code is returned, or UNKNOWN if not implementable. */
17478
17479 static enum rtx_code
17480 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
17481 rtx *pop0, rtx *pop1)
17482 {
17483 rtx tmp;
17484
17485 switch (code)
17486 {
17487 case LTGT:
17488 case UNEQ:
17489 /* We have no LTGT as an operator. We could implement it with
17490 NE & ORDERED, but this requires an extra temporary. It's
17491 not clear that it's worth it. */
17492 return UNKNOWN;
17493
17494 case LT:
17495 case LE:
17496 case UNGT:
17497 case UNGE:
17498 /* These are supported directly. */
17499 break;
17500
17501 case EQ:
17502 case NE:
17503 case UNORDERED:
17504 case ORDERED:
17505 /* For commutative operators, try to canonicalize the destination
17506 operand to be first in the comparison - this helps reload to
17507 avoid extra moves. */
17508 if (!dest || !rtx_equal_p (dest, *pop1))
17509 break;
17510 /* FALLTHRU */
17511
17512 case GE:
17513 case GT:
17514 case UNLE:
17515 case UNLT:
17516 /* These are not supported directly. Swap the comparison operands
17517 to transform into something that is supported. */
17518 tmp = *pop0;
17519 *pop0 = *pop1;
17520 *pop1 = tmp;
17521 code = swap_condition (code);
17522 break;
17523
17524 default:
17525 gcc_unreachable ();
17526 }
17527
17528 return code;
17529 }
17530
17531 /* Detect conditional moves that exactly match min/max operational
17532 semantics. Note that this is IEEE safe, as long as we don't
17533 interchange the operands.
17534
17535 Returns FALSE if this conditional move doesn't match a MIN/MAX,
17536 and TRUE if the operation is successful and instructions are emitted. */
17537
17538 static bool
17539 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
17540 rtx cmp_op1, rtx if_true, rtx if_false)
17541 {
17542 enum machine_mode mode;
17543 bool is_min;
17544 rtx tmp;
17545
17546 if (code == LT)
17547 ;
17548 else if (code == UNGE)
17549 {
17550 tmp = if_true;
17551 if_true = if_false;
17552 if_false = tmp;
17553 }
17554 else
17555 return false;
17556
17557 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
17558 is_min = true;
17559 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
17560 is_min = false;
17561 else
17562 return false;
17563
17564 mode = GET_MODE (dest);
17565
17566 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
17567 but MODE may be a vector mode and thus not appropriate. */
17568 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
17569 {
17570 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
17571 rtvec v;
17572
17573 if_true = force_reg (mode, if_true);
17574 v = gen_rtvec (2, if_true, if_false);
17575 tmp = gen_rtx_UNSPEC (mode, v, u);
17576 }
17577 else
17578 {
17579 code = is_min ? SMIN : SMAX;
17580 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
17581 }
17582
17583 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
17584 return true;
17585 }
17586
17587 /* Expand an sse vector comparison. Return the register with the result. */
17588
17589 static rtx
17590 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
17591 rtx op_true, rtx op_false)
17592 {
17593 enum machine_mode mode = GET_MODE (dest);
17594 rtx x;
17595
17596 cmp_op0 = force_reg (mode, cmp_op0);
17597 if (!nonimmediate_operand (cmp_op1, mode))
17598 cmp_op1 = force_reg (mode, cmp_op1);
17599
17600 if (optimize
17601 || reg_overlap_mentioned_p (dest, op_true)
17602 || reg_overlap_mentioned_p (dest, op_false))
17603 dest = gen_reg_rtx (mode);
17604
17605 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
17606 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17607
17608 return dest;
17609 }
17610
17611 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
17612 operations. This is used for both scalar and vector conditional moves. */
17613
17614 static void
17615 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
17616 {
17617 enum machine_mode mode = GET_MODE (dest);
17618 rtx t2, t3, x;
17619
17620 if (op_false == CONST0_RTX (mode))
17621 {
17622 op_true = force_reg (mode, op_true);
17623 x = gen_rtx_AND (mode, cmp, op_true);
17624 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17625 }
17626 else if (op_true == CONST0_RTX (mode))
17627 {
17628 op_false = force_reg (mode, op_false);
17629 x = gen_rtx_NOT (mode, cmp);
17630 x = gen_rtx_AND (mode, x, op_false);
17631 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17632 }
17633 else if (TARGET_XOP)
17634 {
17635 rtx pcmov = gen_rtx_SET (mode, dest,
17636 gen_rtx_IF_THEN_ELSE (mode, cmp,
17637 op_true,
17638 op_false));
17639 emit_insn (pcmov);
17640 }
17641 else
17642 {
17643 op_true = force_reg (mode, op_true);
17644 op_false = force_reg (mode, op_false);
17645
17646 t2 = gen_reg_rtx (mode);
17647 if (optimize)
17648 t3 = gen_reg_rtx (mode);
17649 else
17650 t3 = dest;
17651
17652 x = gen_rtx_AND (mode, op_true, cmp);
17653 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
17654
17655 x = gen_rtx_NOT (mode, cmp);
17656 x = gen_rtx_AND (mode, x, op_false);
17657 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
17658
17659 x = gen_rtx_IOR (mode, t3, t2);
17660 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17661 }
17662 }
17663
17664 /* Expand a floating-point conditional move. Return true if successful. */
17665
17666 bool
17667 ix86_expand_fp_movcc (rtx operands[])
17668 {
17669 enum machine_mode mode = GET_MODE (operands[0]);
17670 enum rtx_code code = GET_CODE (operands[1]);
17671 rtx tmp, compare_op;
17672 rtx op0 = XEXP (operands[1], 0);
17673 rtx op1 = XEXP (operands[1], 1);
17674
17675 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
17676 {
17677 enum machine_mode cmode;
17678
17679 /* Since we've no cmove for sse registers, don't force bad register
17680 allocation just to gain access to it. Deny movcc when the
17681 comparison mode doesn't match the move mode. */
17682 cmode = GET_MODE (op0);
17683 if (cmode == VOIDmode)
17684 cmode = GET_MODE (op1);
17685 if (cmode != mode)
17686 return false;
17687
17688 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
17689 if (code == UNKNOWN)
17690 return false;
17691
17692 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
17693 operands[2], operands[3]))
17694 return true;
17695
17696 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
17697 operands[2], operands[3]);
17698 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
17699 return true;
17700 }
17701
17702 /* The floating point conditional move instructions don't directly
17703 support conditions resulting from a signed integer comparison. */
17704
17705 compare_op = ix86_expand_compare (code, op0, op1);
17706 if (!fcmov_comparison_operator (compare_op, VOIDmode))
17707 {
17708 tmp = gen_reg_rtx (QImode);
17709 ix86_expand_setcc (tmp, code, op0, op1);
17710
17711 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
17712 }
17713
17714 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17715 gen_rtx_IF_THEN_ELSE (mode, compare_op,
17716 operands[2], operands[3])));
17717
17718 return true;
17719 }
17720
17721 /* Expand a floating-point vector conditional move; a vcond operation
17722 rather than a movcc operation. */
17723
17724 bool
17725 ix86_expand_fp_vcond (rtx operands[])
17726 {
17727 enum rtx_code code = GET_CODE (operands[3]);
17728 rtx cmp;
17729
17730 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
17731 &operands[4], &operands[5]);
17732 if (code == UNKNOWN)
17733 return false;
17734
17735 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
17736 operands[5], operands[1], operands[2]))
17737 return true;
17738
17739 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
17740 operands[1], operands[2]);
17741 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
17742 return true;
17743 }
17744
17745 /* Expand a signed/unsigned integral vector conditional move. */
17746
17747 bool
17748 ix86_expand_int_vcond (rtx operands[])
17749 {
17750 enum machine_mode mode = GET_MODE (operands[0]);
17751 enum rtx_code code = GET_CODE (operands[3]);
17752 bool negate = false;
17753 rtx x, cop0, cop1;
17754
17755 cop0 = operands[4];
17756 cop1 = operands[5];
17757
17758 /* XOP supports all of the comparisons on all vector int types. */
17759 if (!TARGET_XOP)
17760 {
17761 /* Canonicalize the comparison to EQ, GT, GTU. */
17762 switch (code)
17763 {
17764 case EQ:
17765 case GT:
17766 case GTU:
17767 break;
17768
17769 case NE:
17770 case LE:
17771 case LEU:
17772 code = reverse_condition (code);
17773 negate = true;
17774 break;
17775
17776 case GE:
17777 case GEU:
17778 code = reverse_condition (code);
17779 negate = true;
17780 /* FALLTHRU */
17781
17782 case LT:
17783 case LTU:
17784 code = swap_condition (code);
17785 x = cop0, cop0 = cop1, cop1 = x;
17786 break;
17787
17788 default:
17789 gcc_unreachable ();
17790 }
17791
17792 /* Only SSE4.1/SSE4.2 supports V2DImode. */
17793 if (mode == V2DImode)
17794 {
17795 switch (code)
17796 {
17797 case EQ:
17798 /* SSE4.1 supports EQ. */
17799 if (!TARGET_SSE4_1)
17800 return false;
17801 break;
17802
17803 case GT:
17804 case GTU:
17805 /* SSE4.2 supports GT/GTU. */
17806 if (!TARGET_SSE4_2)
17807 return false;
17808 break;
17809
17810 default:
17811 gcc_unreachable ();
17812 }
17813 }
17814
17815 /* Unsigned parallel compare is not supported by the hardware.
17816 Play some tricks to turn this into a signed comparison
17817 against 0. */
17818 if (code == GTU)
17819 {
17820 cop0 = force_reg (mode, cop0);
17821
17822 switch (mode)
17823 {
17824 case V4SImode:
17825 case V2DImode:
17826 {
17827 rtx t1, t2, mask;
17828 rtx (*gen_sub3) (rtx, rtx, rtx);
17829
17830 /* Subtract (-(INT MAX) - 1) from both operands to make
17831 them signed. */
17832 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
17833 true, false);
17834 gen_sub3 = (mode == V4SImode
17835 ? gen_subv4si3 : gen_subv2di3);
17836 t1 = gen_reg_rtx (mode);
17837 emit_insn (gen_sub3 (t1, cop0, mask));
17838
17839 t2 = gen_reg_rtx (mode);
17840 emit_insn (gen_sub3 (t2, cop1, mask));
17841
17842 cop0 = t1;
17843 cop1 = t2;
17844 code = GT;
17845 }
17846 break;
17847
17848 case V16QImode:
17849 case V8HImode:
17850 /* Perform a parallel unsigned saturating subtraction. */
17851 x = gen_reg_rtx (mode);
17852 emit_insn (gen_rtx_SET (VOIDmode, x,
17853 gen_rtx_US_MINUS (mode, cop0, cop1)));
17854
17855 cop0 = x;
17856 cop1 = CONST0_RTX (mode);
17857 code = EQ;
17858 negate = !negate;
17859 break;
17860
17861 default:
17862 gcc_unreachable ();
17863 }
17864 }
17865 }
17866
17867 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
17868 operands[1+negate], operands[2-negate]);
17869
17870 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
17871 operands[2-negate]);
17872 return true;
17873 }
17874
17875 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
17876 true if we should do zero extension, else sign extension. HIGH_P is
17877 true if we want the N/2 high elements, else the low elements. */
17878
17879 void
17880 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17881 {
17882 enum machine_mode imode = GET_MODE (operands[1]);
17883 rtx (*unpack)(rtx, rtx, rtx);
17884 rtx se, dest;
17885
17886 switch (imode)
17887 {
17888 case V16QImode:
17889 if (high_p)
17890 unpack = gen_vec_interleave_highv16qi;
17891 else
17892 unpack = gen_vec_interleave_lowv16qi;
17893 break;
17894 case V8HImode:
17895 if (high_p)
17896 unpack = gen_vec_interleave_highv8hi;
17897 else
17898 unpack = gen_vec_interleave_lowv8hi;
17899 break;
17900 case V4SImode:
17901 if (high_p)
17902 unpack = gen_vec_interleave_highv4si;
17903 else
17904 unpack = gen_vec_interleave_lowv4si;
17905 break;
17906 default:
17907 gcc_unreachable ();
17908 }
17909
17910 dest = gen_lowpart (imode, operands[0]);
17911
17912 if (unsigned_p)
17913 se = force_reg (imode, CONST0_RTX (imode));
17914 else
17915 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
17916 operands[1], pc_rtx, pc_rtx);
17917
17918 emit_insn (unpack (dest, operands[1], se));
17919 }
17920
17921 /* This function performs the same task as ix86_expand_sse_unpack,
17922 but with SSE4.1 instructions. */
17923
17924 void
17925 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17926 {
17927 enum machine_mode imode = GET_MODE (operands[1]);
17928 rtx (*unpack)(rtx, rtx);
17929 rtx src, dest;
17930
17931 switch (imode)
17932 {
17933 case V16QImode:
17934 if (unsigned_p)
17935 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
17936 else
17937 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
17938 break;
17939 case V8HImode:
17940 if (unsigned_p)
17941 unpack = gen_sse4_1_zero_extendv4hiv4si2;
17942 else
17943 unpack = gen_sse4_1_sign_extendv4hiv4si2;
17944 break;
17945 case V4SImode:
17946 if (unsigned_p)
17947 unpack = gen_sse4_1_zero_extendv2siv2di2;
17948 else
17949 unpack = gen_sse4_1_sign_extendv2siv2di2;
17950 break;
17951 default:
17952 gcc_unreachable ();
17953 }
17954
17955 dest = operands[0];
17956 if (high_p)
17957 {
17958 /* Shift higher 8 bytes to lower 8 bytes. */
17959 src = gen_reg_rtx (imode);
17960 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
17961 gen_lowpart (V1TImode, operands[1]),
17962 GEN_INT (64)));
17963 }
17964 else
17965 src = operands[1];
17966
17967 emit_insn (unpack (dest, src));
17968 }
17969
17970 /* Expand conditional increment or decrement using adb/sbb instructions.
17971 The default case using setcc followed by the conditional move can be
17972 done by generic code. */
17973 bool
17974 ix86_expand_int_addcc (rtx operands[])
17975 {
17976 enum rtx_code code = GET_CODE (operands[1]);
17977 rtx flags;
17978 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
17979 rtx compare_op;
17980 rtx val = const0_rtx;
17981 bool fpcmp = false;
17982 enum machine_mode mode;
17983 rtx op0 = XEXP (operands[1], 0);
17984 rtx op1 = XEXP (operands[1], 1);
17985
17986 if (operands[3] != const1_rtx
17987 && operands[3] != constm1_rtx)
17988 return false;
17989 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
17990 return false;
17991 code = GET_CODE (compare_op);
17992
17993 flags = XEXP (compare_op, 0);
17994
17995 if (GET_MODE (flags) == CCFPmode
17996 || GET_MODE (flags) == CCFPUmode)
17997 {
17998 fpcmp = true;
17999 code = ix86_fp_compare_code_to_integer (code);
18000 }
18001
18002 if (code != LTU)
18003 {
18004 val = constm1_rtx;
18005 if (fpcmp)
18006 PUT_CODE (compare_op,
18007 reverse_condition_maybe_unordered
18008 (GET_CODE (compare_op)));
18009 else
18010 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
18011 }
18012
18013 mode = GET_MODE (operands[0]);
18014
18015 /* Construct either adc or sbb insn. */
18016 if ((code == LTU) == (operands[3] == constm1_rtx))
18017 {
18018 switch (mode)
18019 {
18020 case QImode:
18021 insn = gen_subqi3_carry;
18022 break;
18023 case HImode:
18024 insn = gen_subhi3_carry;
18025 break;
18026 case SImode:
18027 insn = gen_subsi3_carry;
18028 break;
18029 case DImode:
18030 insn = gen_subdi3_carry;
18031 break;
18032 default:
18033 gcc_unreachable ();
18034 }
18035 }
18036 else
18037 {
18038 switch (mode)
18039 {
18040 case QImode:
18041 insn = gen_addqi3_carry;
18042 break;
18043 case HImode:
18044 insn = gen_addhi3_carry;
18045 break;
18046 case SImode:
18047 insn = gen_addsi3_carry;
18048 break;
18049 case DImode:
18050 insn = gen_adddi3_carry;
18051 break;
18052 default:
18053 gcc_unreachable ();
18054 }
18055 }
18056 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
18057
18058 return true;
18059 }
18060
18061
18062 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
18063 but works for floating pointer parameters and nonoffsetable memories.
18064 For pushes, it returns just stack offsets; the values will be saved
18065 in the right order. Maximally three parts are generated. */
18066
18067 static int
18068 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
18069 {
18070 int size;
18071
18072 if (!TARGET_64BIT)
18073 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
18074 else
18075 size = (GET_MODE_SIZE (mode) + 4) / 8;
18076
18077 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
18078 gcc_assert (size >= 2 && size <= 4);
18079
18080 /* Optimize constant pool reference to immediates. This is used by fp
18081 moves, that force all constants to memory to allow combining. */
18082 if (MEM_P (operand) && MEM_READONLY_P (operand))
18083 {
18084 rtx tmp = maybe_get_pool_constant (operand);
18085 if (tmp)
18086 operand = tmp;
18087 }
18088
18089 if (MEM_P (operand) && !offsettable_memref_p (operand))
18090 {
18091 /* The only non-offsetable memories we handle are pushes. */
18092 int ok = push_operand (operand, VOIDmode);
18093
18094 gcc_assert (ok);
18095
18096 operand = copy_rtx (operand);
18097 PUT_MODE (operand, Pmode);
18098 parts[0] = parts[1] = parts[2] = parts[3] = operand;
18099 return size;
18100 }
18101
18102 if (GET_CODE (operand) == CONST_VECTOR)
18103 {
18104 enum machine_mode imode = int_mode_for_mode (mode);
18105 /* Caution: if we looked through a constant pool memory above,
18106 the operand may actually have a different mode now. That's
18107 ok, since we want to pun this all the way back to an integer. */
18108 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
18109 gcc_assert (operand != NULL);
18110 mode = imode;
18111 }
18112
18113 if (!TARGET_64BIT)
18114 {
18115 if (mode == DImode)
18116 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18117 else
18118 {
18119 int i;
18120
18121 if (REG_P (operand))
18122 {
18123 gcc_assert (reload_completed);
18124 for (i = 0; i < size; i++)
18125 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
18126 }
18127 else if (offsettable_memref_p (operand))
18128 {
18129 operand = adjust_address (operand, SImode, 0);
18130 parts[0] = operand;
18131 for (i = 1; i < size; i++)
18132 parts[i] = adjust_address (operand, SImode, 4 * i);
18133 }
18134 else if (GET_CODE (operand) == CONST_DOUBLE)
18135 {
18136 REAL_VALUE_TYPE r;
18137 long l[4];
18138
18139 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18140 switch (mode)
18141 {
18142 case TFmode:
18143 real_to_target (l, &r, mode);
18144 parts[3] = gen_int_mode (l[3], SImode);
18145 parts[2] = gen_int_mode (l[2], SImode);
18146 break;
18147 case XFmode:
18148 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
18149 parts[2] = gen_int_mode (l[2], SImode);
18150 break;
18151 case DFmode:
18152 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
18153 break;
18154 default:
18155 gcc_unreachable ();
18156 }
18157 parts[1] = gen_int_mode (l[1], SImode);
18158 parts[0] = gen_int_mode (l[0], SImode);
18159 }
18160 else
18161 gcc_unreachable ();
18162 }
18163 }
18164 else
18165 {
18166 if (mode == TImode)
18167 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18168 if (mode == XFmode || mode == TFmode)
18169 {
18170 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
18171 if (REG_P (operand))
18172 {
18173 gcc_assert (reload_completed);
18174 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
18175 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
18176 }
18177 else if (offsettable_memref_p (operand))
18178 {
18179 operand = adjust_address (operand, DImode, 0);
18180 parts[0] = operand;
18181 parts[1] = adjust_address (operand, upper_mode, 8);
18182 }
18183 else if (GET_CODE (operand) == CONST_DOUBLE)
18184 {
18185 REAL_VALUE_TYPE r;
18186 long l[4];
18187
18188 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18189 real_to_target (l, &r, mode);
18190
18191 /* Do not use shift by 32 to avoid warning on 32bit systems. */
18192 if (HOST_BITS_PER_WIDE_INT >= 64)
18193 parts[0]
18194 = gen_int_mode
18195 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
18196 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
18197 DImode);
18198 else
18199 parts[0] = immed_double_const (l[0], l[1], DImode);
18200
18201 if (upper_mode == SImode)
18202 parts[1] = gen_int_mode (l[2], SImode);
18203 else if (HOST_BITS_PER_WIDE_INT >= 64)
18204 parts[1]
18205 = gen_int_mode
18206 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
18207 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
18208 DImode);
18209 else
18210 parts[1] = immed_double_const (l[2], l[3], DImode);
18211 }
18212 else
18213 gcc_unreachable ();
18214 }
18215 }
18216
18217 return size;
18218 }
18219
18220 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
18221 Return false when normal moves are needed; true when all required
18222 insns have been emitted. Operands 2-4 contain the input values
18223 int the correct order; operands 5-7 contain the output values. */
18224
18225 void
18226 ix86_split_long_move (rtx operands[])
18227 {
18228 rtx part[2][4];
18229 int nparts, i, j;
18230 int push = 0;
18231 int collisions = 0;
18232 enum machine_mode mode = GET_MODE (operands[0]);
18233 bool collisionparts[4];
18234
18235 /* The DFmode expanders may ask us to move double.
18236 For 64bit target this is single move. By hiding the fact
18237 here we simplify i386.md splitters. */
18238 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
18239 {
18240 /* Optimize constant pool reference to immediates. This is used by
18241 fp moves, that force all constants to memory to allow combining. */
18242
18243 if (MEM_P (operands[1])
18244 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
18245 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
18246 operands[1] = get_pool_constant (XEXP (operands[1], 0));
18247 if (push_operand (operands[0], VOIDmode))
18248 {
18249 operands[0] = copy_rtx (operands[0]);
18250 PUT_MODE (operands[0], Pmode);
18251 }
18252 else
18253 operands[0] = gen_lowpart (DImode, operands[0]);
18254 operands[1] = gen_lowpart (DImode, operands[1]);
18255 emit_move_insn (operands[0], operands[1]);
18256 return;
18257 }
18258
18259 /* The only non-offsettable memory we handle is push. */
18260 if (push_operand (operands[0], VOIDmode))
18261 push = 1;
18262 else
18263 gcc_assert (!MEM_P (operands[0])
18264 || offsettable_memref_p (operands[0]));
18265
18266 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
18267 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
18268
18269 /* When emitting push, take care for source operands on the stack. */
18270 if (push && MEM_P (operands[1])
18271 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
18272 {
18273 rtx src_base = XEXP (part[1][nparts - 1], 0);
18274
18275 /* Compensate for the stack decrement by 4. */
18276 if (!TARGET_64BIT && nparts == 3
18277 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
18278 src_base = plus_constant (src_base, 4);
18279
18280 /* src_base refers to the stack pointer and is
18281 automatically decreased by emitted push. */
18282 for (i = 0; i < nparts; i++)
18283 part[1][i] = change_address (part[1][i],
18284 GET_MODE (part[1][i]), src_base);
18285 }
18286
18287 /* We need to do copy in the right order in case an address register
18288 of the source overlaps the destination. */
18289 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
18290 {
18291 rtx tmp;
18292
18293 for (i = 0; i < nparts; i++)
18294 {
18295 collisionparts[i]
18296 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
18297 if (collisionparts[i])
18298 collisions++;
18299 }
18300
18301 /* Collision in the middle part can be handled by reordering. */
18302 if (collisions == 1 && nparts == 3 && collisionparts [1])
18303 {
18304 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
18305 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
18306 }
18307 else if (collisions == 1
18308 && nparts == 4
18309 && (collisionparts [1] || collisionparts [2]))
18310 {
18311 if (collisionparts [1])
18312 {
18313 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
18314 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
18315 }
18316 else
18317 {
18318 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
18319 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
18320 }
18321 }
18322
18323 /* If there are more collisions, we can't handle it by reordering.
18324 Do an lea to the last part and use only one colliding move. */
18325 else if (collisions > 1)
18326 {
18327 rtx base;
18328
18329 collisions = 1;
18330
18331 base = part[0][nparts - 1];
18332
18333 /* Handle the case when the last part isn't valid for lea.
18334 Happens in 64-bit mode storing the 12-byte XFmode. */
18335 if (GET_MODE (base) != Pmode)
18336 base = gen_rtx_REG (Pmode, REGNO (base));
18337
18338 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
18339 part[1][0] = replace_equiv_address (part[1][0], base);
18340 for (i = 1; i < nparts; i++)
18341 {
18342 tmp = plus_constant (base, UNITS_PER_WORD * i);
18343 part[1][i] = replace_equiv_address (part[1][i], tmp);
18344 }
18345 }
18346 }
18347
18348 if (push)
18349 {
18350 if (!TARGET_64BIT)
18351 {
18352 if (nparts == 3)
18353 {
18354 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
18355 emit_insn (gen_addsi3 (stack_pointer_rtx,
18356 stack_pointer_rtx, GEN_INT (-4)));
18357 emit_move_insn (part[0][2], part[1][2]);
18358 }
18359 else if (nparts == 4)
18360 {
18361 emit_move_insn (part[0][3], part[1][3]);
18362 emit_move_insn (part[0][2], part[1][2]);
18363 }
18364 }
18365 else
18366 {
18367 /* In 64bit mode we don't have 32bit push available. In case this is
18368 register, it is OK - we will just use larger counterpart. We also
18369 retype memory - these comes from attempt to avoid REX prefix on
18370 moving of second half of TFmode value. */
18371 if (GET_MODE (part[1][1]) == SImode)
18372 {
18373 switch (GET_CODE (part[1][1]))
18374 {
18375 case MEM:
18376 part[1][1] = adjust_address (part[1][1], DImode, 0);
18377 break;
18378
18379 case REG:
18380 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
18381 break;
18382
18383 default:
18384 gcc_unreachable ();
18385 }
18386
18387 if (GET_MODE (part[1][0]) == SImode)
18388 part[1][0] = part[1][1];
18389 }
18390 }
18391 emit_move_insn (part[0][1], part[1][1]);
18392 emit_move_insn (part[0][0], part[1][0]);
18393 return;
18394 }
18395
18396 /* Choose correct order to not overwrite the source before it is copied. */
18397 if ((REG_P (part[0][0])
18398 && REG_P (part[1][1])
18399 && (REGNO (part[0][0]) == REGNO (part[1][1])
18400 || (nparts == 3
18401 && REGNO (part[0][0]) == REGNO (part[1][2]))
18402 || (nparts == 4
18403 && REGNO (part[0][0]) == REGNO (part[1][3]))))
18404 || (collisions > 0
18405 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
18406 {
18407 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
18408 {
18409 operands[2 + i] = part[0][j];
18410 operands[6 + i] = part[1][j];
18411 }
18412 }
18413 else
18414 {
18415 for (i = 0; i < nparts; i++)
18416 {
18417 operands[2 + i] = part[0][i];
18418 operands[6 + i] = part[1][i];
18419 }
18420 }
18421
18422 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
18423 if (optimize_insn_for_size_p ())
18424 {
18425 for (j = 0; j < nparts - 1; j++)
18426 if (CONST_INT_P (operands[6 + j])
18427 && operands[6 + j] != const0_rtx
18428 && REG_P (operands[2 + j]))
18429 for (i = j; i < nparts - 1; i++)
18430 if (CONST_INT_P (operands[7 + i])
18431 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
18432 operands[7 + i] = operands[2 + j];
18433 }
18434
18435 for (i = 0; i < nparts; i++)
18436 emit_move_insn (operands[2 + i], operands[6 + i]);
18437
18438 return;
18439 }
18440
18441 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
18442 left shift by a constant, either using a single shift or
18443 a sequence of add instructions. */
18444
18445 static void
18446 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
18447 {
18448 rtx (*insn)(rtx, rtx, rtx);
18449
18450 if (count == 1
18451 || (count * ix86_cost->add <= ix86_cost->shift_const
18452 && !optimize_insn_for_size_p ()))
18453 {
18454 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
18455 while (count-- > 0)
18456 emit_insn (insn (operand, operand, operand));
18457 }
18458 else
18459 {
18460 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
18461 emit_insn (insn (operand, operand, GEN_INT (count)));
18462 }
18463 }
18464
18465 void
18466 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
18467 {
18468 rtx (*gen_ashl3)(rtx, rtx, rtx);
18469 rtx (*gen_shld)(rtx, rtx, rtx);
18470 int half_width = GET_MODE_BITSIZE (mode) >> 1;
18471
18472 rtx low[2], high[2];
18473 int count;
18474
18475 if (CONST_INT_P (operands[2]))
18476 {
18477 split_double_mode (mode, operands, 2, low, high);
18478 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
18479
18480 if (count >= half_width)
18481 {
18482 emit_move_insn (high[0], low[1]);
18483 emit_move_insn (low[0], const0_rtx);
18484
18485 if (count > half_width)
18486 ix86_expand_ashl_const (high[0], count - half_width, mode);
18487 }
18488 else
18489 {
18490 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
18491
18492 if (!rtx_equal_p (operands[0], operands[1]))
18493 emit_move_insn (operands[0], operands[1]);
18494
18495 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
18496 ix86_expand_ashl_const (low[0], count, mode);
18497 }
18498 return;
18499 }
18500
18501 split_double_mode (mode, operands, 1, low, high);
18502
18503 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
18504
18505 if (operands[1] == const1_rtx)
18506 {
18507 /* Assuming we've chosen a QImode capable registers, then 1 << N
18508 can be done with two 32/64-bit shifts, no branches, no cmoves. */
18509 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
18510 {
18511 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
18512
18513 ix86_expand_clear (low[0]);
18514 ix86_expand_clear (high[0]);
18515 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
18516
18517 d = gen_lowpart (QImode, low[0]);
18518 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
18519 s = gen_rtx_EQ (QImode, flags, const0_rtx);
18520 emit_insn (gen_rtx_SET (VOIDmode, d, s));
18521
18522 d = gen_lowpart (QImode, high[0]);
18523 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
18524 s = gen_rtx_NE (QImode, flags, const0_rtx);
18525 emit_insn (gen_rtx_SET (VOIDmode, d, s));
18526 }
18527
18528 /* Otherwise, we can get the same results by manually performing
18529 a bit extract operation on bit 5/6, and then performing the two
18530 shifts. The two methods of getting 0/1 into low/high are exactly
18531 the same size. Avoiding the shift in the bit extract case helps
18532 pentium4 a bit; no one else seems to care much either way. */
18533 else
18534 {
18535 enum machine_mode half_mode;
18536 rtx (*gen_lshr3)(rtx, rtx, rtx);
18537 rtx (*gen_and3)(rtx, rtx, rtx);
18538 rtx (*gen_xor3)(rtx, rtx, rtx);
18539 HOST_WIDE_INT bits;
18540 rtx x;
18541
18542 if (mode == DImode)
18543 {
18544 half_mode = SImode;
18545 gen_lshr3 = gen_lshrsi3;
18546 gen_and3 = gen_andsi3;
18547 gen_xor3 = gen_xorsi3;
18548 bits = 5;
18549 }
18550 else
18551 {
18552 half_mode = DImode;
18553 gen_lshr3 = gen_lshrdi3;
18554 gen_and3 = gen_anddi3;
18555 gen_xor3 = gen_xordi3;
18556 bits = 6;
18557 }
18558
18559 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
18560 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
18561 else
18562 x = gen_lowpart (half_mode, operands[2]);
18563 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
18564
18565 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
18566 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
18567 emit_move_insn (low[0], high[0]);
18568 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
18569 }
18570
18571 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
18572 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
18573 return;
18574 }
18575
18576 if (operands[1] == constm1_rtx)
18577 {
18578 /* For -1 << N, we can avoid the shld instruction, because we
18579 know that we're shifting 0...31/63 ones into a -1. */
18580 emit_move_insn (low[0], constm1_rtx);
18581 if (optimize_insn_for_size_p ())
18582 emit_move_insn (high[0], low[0]);
18583 else
18584 emit_move_insn (high[0], constm1_rtx);
18585 }
18586 else
18587 {
18588 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
18589
18590 if (!rtx_equal_p (operands[0], operands[1]))
18591 emit_move_insn (operands[0], operands[1]);
18592
18593 split_double_mode (mode, operands, 1, low, high);
18594 emit_insn (gen_shld (high[0], low[0], operands[2]));
18595 }
18596
18597 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
18598
18599 if (TARGET_CMOVE && scratch)
18600 {
18601 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
18602 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
18603
18604 ix86_expand_clear (scratch);
18605 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
18606 }
18607 else
18608 {
18609 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
18610 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
18611
18612 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
18613 }
18614 }
18615
18616 void
18617 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
18618 {
18619 rtx (*gen_ashr3)(rtx, rtx, rtx)
18620 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
18621 rtx (*gen_shrd)(rtx, rtx, rtx);
18622 int half_width = GET_MODE_BITSIZE (mode) >> 1;
18623
18624 rtx low[2], high[2];
18625 int count;
18626
18627 if (CONST_INT_P (operands[2]))
18628 {
18629 split_double_mode (mode, operands, 2, low, high);
18630 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
18631
18632 if (count == GET_MODE_BITSIZE (mode) - 1)
18633 {
18634 emit_move_insn (high[0], high[1]);
18635 emit_insn (gen_ashr3 (high[0], high[0],
18636 GEN_INT (half_width - 1)));
18637 emit_move_insn (low[0], high[0]);
18638
18639 }
18640 else if (count >= half_width)
18641 {
18642 emit_move_insn (low[0], high[1]);
18643 emit_move_insn (high[0], low[0]);
18644 emit_insn (gen_ashr3 (high[0], high[0],
18645 GEN_INT (half_width - 1)));
18646
18647 if (count > half_width)
18648 emit_insn (gen_ashr3 (low[0], low[0],
18649 GEN_INT (count - half_width)));
18650 }
18651 else
18652 {
18653 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
18654
18655 if (!rtx_equal_p (operands[0], operands[1]))
18656 emit_move_insn (operands[0], operands[1]);
18657
18658 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
18659 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
18660 }
18661 }
18662 else
18663 {
18664 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
18665
18666 if (!rtx_equal_p (operands[0], operands[1]))
18667 emit_move_insn (operands[0], operands[1]);
18668
18669 split_double_mode (mode, operands, 1, low, high);
18670
18671 emit_insn (gen_shrd (low[0], high[0], operands[2]));
18672 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
18673
18674 if (TARGET_CMOVE && scratch)
18675 {
18676 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
18677 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
18678
18679 emit_move_insn (scratch, high[0]);
18680 emit_insn (gen_ashr3 (scratch, scratch,
18681 GEN_INT (half_width - 1)));
18682 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
18683 scratch));
18684 }
18685 else
18686 {
18687 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
18688 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
18689
18690 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
18691 }
18692 }
18693 }
18694
18695 void
18696 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
18697 {
18698 rtx (*gen_lshr3)(rtx, rtx, rtx)
18699 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
18700 rtx (*gen_shrd)(rtx, rtx, rtx);
18701 int half_width = GET_MODE_BITSIZE (mode) >> 1;
18702
18703 rtx low[2], high[2];
18704 int count;
18705
18706 if (CONST_INT_P (operands[2]))
18707 {
18708 split_double_mode (mode, operands, 2, low, high);
18709 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
18710
18711 if (count >= half_width)
18712 {
18713 emit_move_insn (low[0], high[1]);
18714 ix86_expand_clear (high[0]);
18715
18716 if (count > half_width)
18717 emit_insn (gen_lshr3 (low[0], low[0],
18718 GEN_INT (count - half_width)));
18719 }
18720 else
18721 {
18722 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
18723
18724 if (!rtx_equal_p (operands[0], operands[1]))
18725 emit_move_insn (operands[0], operands[1]);
18726
18727 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
18728 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
18729 }
18730 }
18731 else
18732 {
18733 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
18734
18735 if (!rtx_equal_p (operands[0], operands[1]))
18736 emit_move_insn (operands[0], operands[1]);
18737
18738 split_double_mode (mode, operands, 1, low, high);
18739
18740 emit_insn (gen_shrd (low[0], high[0], operands[2]));
18741 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
18742
18743 if (TARGET_CMOVE && scratch)
18744 {
18745 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
18746 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
18747
18748 ix86_expand_clear (scratch);
18749 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
18750 scratch));
18751 }
18752 else
18753 {
18754 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
18755 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
18756
18757 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
18758 }
18759 }
18760 }
18761
18762 /* Predict just emitted jump instruction to be taken with probability PROB. */
18763 static void
18764 predict_jump (int prob)
18765 {
18766 rtx insn = get_last_insn ();
18767 gcc_assert (JUMP_P (insn));
18768 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
18769 }
18770
18771 /* Helper function for the string operations below. Dest VARIABLE whether
18772 it is aligned to VALUE bytes. If true, jump to the label. */
18773 static rtx
18774 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
18775 {
18776 rtx label = gen_label_rtx ();
18777 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
18778 if (GET_MODE (variable) == DImode)
18779 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
18780 else
18781 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
18782 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
18783 1, label);
18784 if (epilogue)
18785 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18786 else
18787 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18788 return label;
18789 }
18790
18791 /* Adjust COUNTER by the VALUE. */
18792 static void
18793 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
18794 {
18795 rtx (*gen_add)(rtx, rtx, rtx)
18796 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
18797
18798 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
18799 }
18800
18801 /* Zero extend possibly SImode EXP to Pmode register. */
18802 rtx
18803 ix86_zero_extend_to_Pmode (rtx exp)
18804 {
18805 rtx r;
18806 if (GET_MODE (exp) == VOIDmode)
18807 return force_reg (Pmode, exp);
18808 if (GET_MODE (exp) == Pmode)
18809 return copy_to_mode_reg (Pmode, exp);
18810 r = gen_reg_rtx (Pmode);
18811 emit_insn (gen_zero_extendsidi2 (r, exp));
18812 return r;
18813 }
18814
18815 /* Divide COUNTREG by SCALE. */
18816 static rtx
18817 scale_counter (rtx countreg, int scale)
18818 {
18819 rtx sc;
18820
18821 if (scale == 1)
18822 return countreg;
18823 if (CONST_INT_P (countreg))
18824 return GEN_INT (INTVAL (countreg) / scale);
18825 gcc_assert (REG_P (countreg));
18826
18827 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
18828 GEN_INT (exact_log2 (scale)),
18829 NULL, 1, OPTAB_DIRECT);
18830 return sc;
18831 }
18832
18833 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
18834 DImode for constant loop counts. */
18835
18836 static enum machine_mode
18837 counter_mode (rtx count_exp)
18838 {
18839 if (GET_MODE (count_exp) != VOIDmode)
18840 return GET_MODE (count_exp);
18841 if (!CONST_INT_P (count_exp))
18842 return Pmode;
18843 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
18844 return DImode;
18845 return SImode;
18846 }
18847
18848 /* When SRCPTR is non-NULL, output simple loop to move memory
18849 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
18850 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
18851 equivalent loop to set memory by VALUE (supposed to be in MODE).
18852
18853 The size is rounded down to whole number of chunk size moved at once.
18854 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
18855
18856
18857 static void
18858 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
18859 rtx destptr, rtx srcptr, rtx value,
18860 rtx count, enum machine_mode mode, int unroll,
18861 int expected_size)
18862 {
18863 rtx out_label, top_label, iter, tmp;
18864 enum machine_mode iter_mode = counter_mode (count);
18865 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
18866 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
18867 rtx size;
18868 rtx x_addr;
18869 rtx y_addr;
18870 int i;
18871
18872 top_label = gen_label_rtx ();
18873 out_label = gen_label_rtx ();
18874 iter = gen_reg_rtx (iter_mode);
18875
18876 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
18877 NULL, 1, OPTAB_DIRECT);
18878 /* Those two should combine. */
18879 if (piece_size == const1_rtx)
18880 {
18881 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
18882 true, out_label);
18883 predict_jump (REG_BR_PROB_BASE * 10 / 100);
18884 }
18885 emit_move_insn (iter, const0_rtx);
18886
18887 emit_label (top_label);
18888
18889 tmp = convert_modes (Pmode, iter_mode, iter, true);
18890 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
18891 destmem = change_address (destmem, mode, x_addr);
18892
18893 if (srcmem)
18894 {
18895 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
18896 srcmem = change_address (srcmem, mode, y_addr);
18897
18898 /* When unrolling for chips that reorder memory reads and writes,
18899 we can save registers by using single temporary.
18900 Also using 4 temporaries is overkill in 32bit mode. */
18901 if (!TARGET_64BIT && 0)
18902 {
18903 for (i = 0; i < unroll; i++)
18904 {
18905 if (i)
18906 {
18907 destmem =
18908 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18909 srcmem =
18910 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
18911 }
18912 emit_move_insn (destmem, srcmem);
18913 }
18914 }
18915 else
18916 {
18917 rtx tmpreg[4];
18918 gcc_assert (unroll <= 4);
18919 for (i = 0; i < unroll; i++)
18920 {
18921 tmpreg[i] = gen_reg_rtx (mode);
18922 if (i)
18923 {
18924 srcmem =
18925 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
18926 }
18927 emit_move_insn (tmpreg[i], srcmem);
18928 }
18929 for (i = 0; i < unroll; i++)
18930 {
18931 if (i)
18932 {
18933 destmem =
18934 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18935 }
18936 emit_move_insn (destmem, tmpreg[i]);
18937 }
18938 }
18939 }
18940 else
18941 for (i = 0; i < unroll; i++)
18942 {
18943 if (i)
18944 destmem =
18945 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18946 emit_move_insn (destmem, value);
18947 }
18948
18949 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
18950 true, OPTAB_LIB_WIDEN);
18951 if (tmp != iter)
18952 emit_move_insn (iter, tmp);
18953
18954 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
18955 true, top_label);
18956 if (expected_size != -1)
18957 {
18958 expected_size /= GET_MODE_SIZE (mode) * unroll;
18959 if (expected_size == 0)
18960 predict_jump (0);
18961 else if (expected_size > REG_BR_PROB_BASE)
18962 predict_jump (REG_BR_PROB_BASE - 1);
18963 else
18964 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
18965 }
18966 else
18967 predict_jump (REG_BR_PROB_BASE * 80 / 100);
18968 iter = ix86_zero_extend_to_Pmode (iter);
18969 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
18970 true, OPTAB_LIB_WIDEN);
18971 if (tmp != destptr)
18972 emit_move_insn (destptr, tmp);
18973 if (srcptr)
18974 {
18975 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
18976 true, OPTAB_LIB_WIDEN);
18977 if (tmp != srcptr)
18978 emit_move_insn (srcptr, tmp);
18979 }
18980 emit_label (out_label);
18981 }
18982
18983 /* Output "rep; mov" instruction.
18984 Arguments have same meaning as for previous function */
18985 static void
18986 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
18987 rtx destptr, rtx srcptr,
18988 rtx count,
18989 enum machine_mode mode)
18990 {
18991 rtx destexp;
18992 rtx srcexp;
18993 rtx countreg;
18994
18995 /* If the size is known, it is shorter to use rep movs. */
18996 if (mode == QImode && CONST_INT_P (count)
18997 && !(INTVAL (count) & 3))
18998 mode = SImode;
18999
19000 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19001 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19002 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
19003 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
19004 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19005 if (mode != QImode)
19006 {
19007 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19008 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19009 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19010 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
19011 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19012 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
19013 }
19014 else
19015 {
19016 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19017 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
19018 }
19019 if (CONST_INT_P (count))
19020 {
19021 count = GEN_INT (INTVAL (count)
19022 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19023 destmem = shallow_copy_rtx (destmem);
19024 srcmem = shallow_copy_rtx (srcmem);
19025 set_mem_size (destmem, count);
19026 set_mem_size (srcmem, count);
19027 }
19028 else
19029 {
19030 if (MEM_SIZE (destmem))
19031 set_mem_size (destmem, NULL_RTX);
19032 if (MEM_SIZE (srcmem))
19033 set_mem_size (srcmem, NULL_RTX);
19034 }
19035 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
19036 destexp, srcexp));
19037 }
19038
19039 /* Output "rep; stos" instruction.
19040 Arguments have same meaning as for previous function */
19041 static void
19042 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
19043 rtx count, enum machine_mode mode,
19044 rtx orig_value)
19045 {
19046 rtx destexp;
19047 rtx countreg;
19048
19049 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19050 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19051 value = force_reg (mode, gen_lowpart (mode, value));
19052 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19053 if (mode != QImode)
19054 {
19055 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19056 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19057 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19058 }
19059 else
19060 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19061 if (orig_value == const0_rtx && CONST_INT_P (count))
19062 {
19063 count = GEN_INT (INTVAL (count)
19064 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19065 destmem = shallow_copy_rtx (destmem);
19066 set_mem_size (destmem, count);
19067 }
19068 else if (MEM_SIZE (destmem))
19069 set_mem_size (destmem, NULL_RTX);
19070 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
19071 }
19072
19073 static void
19074 emit_strmov (rtx destmem, rtx srcmem,
19075 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
19076 {
19077 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
19078 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
19079 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19080 }
19081
19082 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
19083 static void
19084 expand_movmem_epilogue (rtx destmem, rtx srcmem,
19085 rtx destptr, rtx srcptr, rtx count, int max_size)
19086 {
19087 rtx src, dest;
19088 if (CONST_INT_P (count))
19089 {
19090 HOST_WIDE_INT countval = INTVAL (count);
19091 int offset = 0;
19092
19093 if ((countval & 0x10) && max_size > 16)
19094 {
19095 if (TARGET_64BIT)
19096 {
19097 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19098 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
19099 }
19100 else
19101 gcc_unreachable ();
19102 offset += 16;
19103 }
19104 if ((countval & 0x08) && max_size > 8)
19105 {
19106 if (TARGET_64BIT)
19107 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19108 else
19109 {
19110 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19111 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
19112 }
19113 offset += 8;
19114 }
19115 if ((countval & 0x04) && max_size > 4)
19116 {
19117 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19118 offset += 4;
19119 }
19120 if ((countval & 0x02) && max_size > 2)
19121 {
19122 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
19123 offset += 2;
19124 }
19125 if ((countval & 0x01) && max_size > 1)
19126 {
19127 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
19128 offset += 1;
19129 }
19130 return;
19131 }
19132 if (max_size > 8)
19133 {
19134 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
19135 count, 1, OPTAB_DIRECT);
19136 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
19137 count, QImode, 1, 4);
19138 return;
19139 }
19140
19141 /* When there are stringops, we can cheaply increase dest and src pointers.
19142 Otherwise we save code size by maintaining offset (zero is readily
19143 available from preceding rep operation) and using x86 addressing modes.
19144 */
19145 if (TARGET_SINGLE_STRINGOP)
19146 {
19147 if (max_size > 4)
19148 {
19149 rtx label = ix86_expand_aligntest (count, 4, true);
19150 src = change_address (srcmem, SImode, srcptr);
19151 dest = change_address (destmem, SImode, destptr);
19152 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19153 emit_label (label);
19154 LABEL_NUSES (label) = 1;
19155 }
19156 if (max_size > 2)
19157 {
19158 rtx label = ix86_expand_aligntest (count, 2, true);
19159 src = change_address (srcmem, HImode, srcptr);
19160 dest = change_address (destmem, HImode, destptr);
19161 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19162 emit_label (label);
19163 LABEL_NUSES (label) = 1;
19164 }
19165 if (max_size > 1)
19166 {
19167 rtx label = ix86_expand_aligntest (count, 1, true);
19168 src = change_address (srcmem, QImode, srcptr);
19169 dest = change_address (destmem, QImode, destptr);
19170 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19171 emit_label (label);
19172 LABEL_NUSES (label) = 1;
19173 }
19174 }
19175 else
19176 {
19177 rtx offset = force_reg (Pmode, const0_rtx);
19178 rtx tmp;
19179
19180 if (max_size > 4)
19181 {
19182 rtx label = ix86_expand_aligntest (count, 4, true);
19183 src = change_address (srcmem, SImode, srcptr);
19184 dest = change_address (destmem, SImode, destptr);
19185 emit_move_insn (dest, src);
19186 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
19187 true, OPTAB_LIB_WIDEN);
19188 if (tmp != offset)
19189 emit_move_insn (offset, tmp);
19190 emit_label (label);
19191 LABEL_NUSES (label) = 1;
19192 }
19193 if (max_size > 2)
19194 {
19195 rtx label = ix86_expand_aligntest (count, 2, true);
19196 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
19197 src = change_address (srcmem, HImode, tmp);
19198 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
19199 dest = change_address (destmem, HImode, tmp);
19200 emit_move_insn (dest, src);
19201 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
19202 true, OPTAB_LIB_WIDEN);
19203 if (tmp != offset)
19204 emit_move_insn (offset, tmp);
19205 emit_label (label);
19206 LABEL_NUSES (label) = 1;
19207 }
19208 if (max_size > 1)
19209 {
19210 rtx label = ix86_expand_aligntest (count, 1, true);
19211 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
19212 src = change_address (srcmem, QImode, tmp);
19213 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
19214 dest = change_address (destmem, QImode, tmp);
19215 emit_move_insn (dest, src);
19216 emit_label (label);
19217 LABEL_NUSES (label) = 1;
19218 }
19219 }
19220 }
19221
19222 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
19223 static void
19224 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
19225 rtx count, int max_size)
19226 {
19227 count =
19228 expand_simple_binop (counter_mode (count), AND, count,
19229 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
19230 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
19231 gen_lowpart (QImode, value), count, QImode,
19232 1, max_size / 2);
19233 }
19234
19235 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
19236 static void
19237 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
19238 {
19239 rtx dest;
19240
19241 if (CONST_INT_P (count))
19242 {
19243 HOST_WIDE_INT countval = INTVAL (count);
19244 int offset = 0;
19245
19246 if ((countval & 0x10) && max_size > 16)
19247 {
19248 if (TARGET_64BIT)
19249 {
19250 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
19251 emit_insn (gen_strset (destptr, dest, value));
19252 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
19253 emit_insn (gen_strset (destptr, dest, value));
19254 }
19255 else
19256 gcc_unreachable ();
19257 offset += 16;
19258 }
19259 if ((countval & 0x08) && max_size > 8)
19260 {
19261 if (TARGET_64BIT)
19262 {
19263 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
19264 emit_insn (gen_strset (destptr, dest, value));
19265 }
19266 else
19267 {
19268 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
19269 emit_insn (gen_strset (destptr, dest, value));
19270 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
19271 emit_insn (gen_strset (destptr, dest, value));
19272 }
19273 offset += 8;
19274 }
19275 if ((countval & 0x04) && max_size > 4)
19276 {
19277 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
19278 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
19279 offset += 4;
19280 }
19281 if ((countval & 0x02) && max_size > 2)
19282 {
19283 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
19284 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
19285 offset += 2;
19286 }
19287 if ((countval & 0x01) && max_size > 1)
19288 {
19289 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
19290 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
19291 offset += 1;
19292 }
19293 return;
19294 }
19295 if (max_size > 32)
19296 {
19297 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
19298 return;
19299 }
19300 if (max_size > 16)
19301 {
19302 rtx label = ix86_expand_aligntest (count, 16, true);
19303 if (TARGET_64BIT)
19304 {
19305 dest = change_address (destmem, DImode, destptr);
19306 emit_insn (gen_strset (destptr, dest, value));
19307 emit_insn (gen_strset (destptr, dest, value));
19308 }
19309 else
19310 {
19311 dest = change_address (destmem, SImode, destptr);
19312 emit_insn (gen_strset (destptr, dest, value));
19313 emit_insn (gen_strset (destptr, dest, value));
19314 emit_insn (gen_strset (destptr, dest, value));
19315 emit_insn (gen_strset (destptr, dest, value));
19316 }
19317 emit_label (label);
19318 LABEL_NUSES (label) = 1;
19319 }
19320 if (max_size > 8)
19321 {
19322 rtx label = ix86_expand_aligntest (count, 8, true);
19323 if (TARGET_64BIT)
19324 {
19325 dest = change_address (destmem, DImode, destptr);
19326 emit_insn (gen_strset (destptr, dest, value));
19327 }
19328 else
19329 {
19330 dest = change_address (destmem, SImode, destptr);
19331 emit_insn (gen_strset (destptr, dest, value));
19332 emit_insn (gen_strset (destptr, dest, value));
19333 }
19334 emit_label (label);
19335 LABEL_NUSES (label) = 1;
19336 }
19337 if (max_size > 4)
19338 {
19339 rtx label = ix86_expand_aligntest (count, 4, true);
19340 dest = change_address (destmem, SImode, destptr);
19341 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
19342 emit_label (label);
19343 LABEL_NUSES (label) = 1;
19344 }
19345 if (max_size > 2)
19346 {
19347 rtx label = ix86_expand_aligntest (count, 2, true);
19348 dest = change_address (destmem, HImode, destptr);
19349 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
19350 emit_label (label);
19351 LABEL_NUSES (label) = 1;
19352 }
19353 if (max_size > 1)
19354 {
19355 rtx label = ix86_expand_aligntest (count, 1, true);
19356 dest = change_address (destmem, QImode, destptr);
19357 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
19358 emit_label (label);
19359 LABEL_NUSES (label) = 1;
19360 }
19361 }
19362
19363 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
19364 DESIRED_ALIGNMENT. */
19365 static void
19366 expand_movmem_prologue (rtx destmem, rtx srcmem,
19367 rtx destptr, rtx srcptr, rtx count,
19368 int align, int desired_alignment)
19369 {
19370 if (align <= 1 && desired_alignment > 1)
19371 {
19372 rtx label = ix86_expand_aligntest (destptr, 1, false);
19373 srcmem = change_address (srcmem, QImode, srcptr);
19374 destmem = change_address (destmem, QImode, destptr);
19375 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
19376 ix86_adjust_counter (count, 1);
19377 emit_label (label);
19378 LABEL_NUSES (label) = 1;
19379 }
19380 if (align <= 2 && desired_alignment > 2)
19381 {
19382 rtx label = ix86_expand_aligntest (destptr, 2, false);
19383 srcmem = change_address (srcmem, HImode, srcptr);
19384 destmem = change_address (destmem, HImode, destptr);
19385 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
19386 ix86_adjust_counter (count, 2);
19387 emit_label (label);
19388 LABEL_NUSES (label) = 1;
19389 }
19390 if (align <= 4 && desired_alignment > 4)
19391 {
19392 rtx label = ix86_expand_aligntest (destptr, 4, false);
19393 srcmem = change_address (srcmem, SImode, srcptr);
19394 destmem = change_address (destmem, SImode, destptr);
19395 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
19396 ix86_adjust_counter (count, 4);
19397 emit_label (label);
19398 LABEL_NUSES (label) = 1;
19399 }
19400 gcc_assert (desired_alignment <= 8);
19401 }
19402
19403 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
19404 ALIGN_BYTES is how many bytes need to be copied. */
19405 static rtx
19406 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
19407 int desired_align, int align_bytes)
19408 {
19409 rtx src = *srcp;
19410 rtx src_size, dst_size;
19411 int off = 0;
19412 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
19413 if (src_align_bytes >= 0)
19414 src_align_bytes = desired_align - src_align_bytes;
19415 src_size = MEM_SIZE (src);
19416 dst_size = MEM_SIZE (dst);
19417 if (align_bytes & 1)
19418 {
19419 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
19420 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
19421 off = 1;
19422 emit_insn (gen_strmov (destreg, dst, srcreg, src));
19423 }
19424 if (align_bytes & 2)
19425 {
19426 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
19427 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
19428 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
19429 set_mem_align (dst, 2 * BITS_PER_UNIT);
19430 if (src_align_bytes >= 0
19431 && (src_align_bytes & 1) == (align_bytes & 1)
19432 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
19433 set_mem_align (src, 2 * BITS_PER_UNIT);
19434 off = 2;
19435 emit_insn (gen_strmov (destreg, dst, srcreg, src));
19436 }
19437 if (align_bytes & 4)
19438 {
19439 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
19440 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
19441 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
19442 set_mem_align (dst, 4 * BITS_PER_UNIT);
19443 if (src_align_bytes >= 0)
19444 {
19445 unsigned int src_align = 0;
19446 if ((src_align_bytes & 3) == (align_bytes & 3))
19447 src_align = 4;
19448 else if ((src_align_bytes & 1) == (align_bytes & 1))
19449 src_align = 2;
19450 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
19451 set_mem_align (src, src_align * BITS_PER_UNIT);
19452 }
19453 off = 4;
19454 emit_insn (gen_strmov (destreg, dst, srcreg, src));
19455 }
19456 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
19457 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
19458 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
19459 set_mem_align (dst, desired_align * BITS_PER_UNIT);
19460 if (src_align_bytes >= 0)
19461 {
19462 unsigned int src_align = 0;
19463 if ((src_align_bytes & 7) == (align_bytes & 7))
19464 src_align = 8;
19465 else if ((src_align_bytes & 3) == (align_bytes & 3))
19466 src_align = 4;
19467 else if ((src_align_bytes & 1) == (align_bytes & 1))
19468 src_align = 2;
19469 if (src_align > (unsigned int) desired_align)
19470 src_align = desired_align;
19471 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
19472 set_mem_align (src, src_align * BITS_PER_UNIT);
19473 }
19474 if (dst_size)
19475 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
19476 if (src_size)
19477 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
19478 *srcp = src;
19479 return dst;
19480 }
19481
19482 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
19483 DESIRED_ALIGNMENT. */
19484 static void
19485 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
19486 int align, int desired_alignment)
19487 {
19488 if (align <= 1 && desired_alignment > 1)
19489 {
19490 rtx label = ix86_expand_aligntest (destptr, 1, false);
19491 destmem = change_address (destmem, QImode, destptr);
19492 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
19493 ix86_adjust_counter (count, 1);
19494 emit_label (label);
19495 LABEL_NUSES (label) = 1;
19496 }
19497 if (align <= 2 && desired_alignment > 2)
19498 {
19499 rtx label = ix86_expand_aligntest (destptr, 2, false);
19500 destmem = change_address (destmem, HImode, destptr);
19501 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
19502 ix86_adjust_counter (count, 2);
19503 emit_label (label);
19504 LABEL_NUSES (label) = 1;
19505 }
19506 if (align <= 4 && desired_alignment > 4)
19507 {
19508 rtx label = ix86_expand_aligntest (destptr, 4, false);
19509 destmem = change_address (destmem, SImode, destptr);
19510 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
19511 ix86_adjust_counter (count, 4);
19512 emit_label (label);
19513 LABEL_NUSES (label) = 1;
19514 }
19515 gcc_assert (desired_alignment <= 8);
19516 }
19517
19518 /* Set enough from DST to align DST known to by aligned by ALIGN to
19519 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
19520 static rtx
19521 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
19522 int desired_align, int align_bytes)
19523 {
19524 int off = 0;
19525 rtx dst_size = MEM_SIZE (dst);
19526 if (align_bytes & 1)
19527 {
19528 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
19529 off = 1;
19530 emit_insn (gen_strset (destreg, dst,
19531 gen_lowpart (QImode, value)));
19532 }
19533 if (align_bytes & 2)
19534 {
19535 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
19536 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
19537 set_mem_align (dst, 2 * BITS_PER_UNIT);
19538 off = 2;
19539 emit_insn (gen_strset (destreg, dst,
19540 gen_lowpart (HImode, value)));
19541 }
19542 if (align_bytes & 4)
19543 {
19544 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
19545 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
19546 set_mem_align (dst, 4 * BITS_PER_UNIT);
19547 off = 4;
19548 emit_insn (gen_strset (destreg, dst,
19549 gen_lowpart (SImode, value)));
19550 }
19551 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
19552 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
19553 set_mem_align (dst, desired_align * BITS_PER_UNIT);
19554 if (dst_size)
19555 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
19556 return dst;
19557 }
19558
19559 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
19560 static enum stringop_alg
19561 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
19562 int *dynamic_check)
19563 {
19564 const struct stringop_algs * algs;
19565 bool optimize_for_speed;
19566 /* Algorithms using the rep prefix want at least edi and ecx;
19567 additionally, memset wants eax and memcpy wants esi. Don't
19568 consider such algorithms if the user has appropriated those
19569 registers for their own purposes. */
19570 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
19571 || (memset
19572 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
19573
19574 #define ALG_USABLE_P(alg) (rep_prefix_usable \
19575 || (alg != rep_prefix_1_byte \
19576 && alg != rep_prefix_4_byte \
19577 && alg != rep_prefix_8_byte))
19578 const struct processor_costs *cost;
19579
19580 /* Even if the string operation call is cold, we still might spend a lot
19581 of time processing large blocks. */
19582 if (optimize_function_for_size_p (cfun)
19583 || (optimize_insn_for_size_p ()
19584 && expected_size != -1 && expected_size < 256))
19585 optimize_for_speed = false;
19586 else
19587 optimize_for_speed = true;
19588
19589 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
19590
19591 *dynamic_check = -1;
19592 if (memset)
19593 algs = &cost->memset[TARGET_64BIT != 0];
19594 else
19595 algs = &cost->memcpy[TARGET_64BIT != 0];
19596 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
19597 return stringop_alg;
19598 /* rep; movq or rep; movl is the smallest variant. */
19599 else if (!optimize_for_speed)
19600 {
19601 if (!count || (count & 3))
19602 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
19603 else
19604 return rep_prefix_usable ? rep_prefix_4_byte : loop;
19605 }
19606 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
19607 */
19608 else if (expected_size != -1 && expected_size < 4)
19609 return loop_1_byte;
19610 else if (expected_size != -1)
19611 {
19612 unsigned int i;
19613 enum stringop_alg alg = libcall;
19614 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
19615 {
19616 /* We get here if the algorithms that were not libcall-based
19617 were rep-prefix based and we are unable to use rep prefixes
19618 based on global register usage. Break out of the loop and
19619 use the heuristic below. */
19620 if (algs->size[i].max == 0)
19621 break;
19622 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
19623 {
19624 enum stringop_alg candidate = algs->size[i].alg;
19625
19626 if (candidate != libcall && ALG_USABLE_P (candidate))
19627 alg = candidate;
19628 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
19629 last non-libcall inline algorithm. */
19630 if (TARGET_INLINE_ALL_STRINGOPS)
19631 {
19632 /* When the current size is best to be copied by a libcall,
19633 but we are still forced to inline, run the heuristic below
19634 that will pick code for medium sized blocks. */
19635 if (alg != libcall)
19636 return alg;
19637 break;
19638 }
19639 else if (ALG_USABLE_P (candidate))
19640 return candidate;
19641 }
19642 }
19643 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
19644 }
19645 /* When asked to inline the call anyway, try to pick meaningful choice.
19646 We look for maximal size of block that is faster to copy by hand and
19647 take blocks of at most of that size guessing that average size will
19648 be roughly half of the block.
19649
19650 If this turns out to be bad, we might simply specify the preferred
19651 choice in ix86_costs. */
19652 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19653 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
19654 {
19655 int max = -1;
19656 enum stringop_alg alg;
19657 int i;
19658 bool any_alg_usable_p = true;
19659
19660 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
19661 {
19662 enum stringop_alg candidate = algs->size[i].alg;
19663 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
19664
19665 if (candidate != libcall && candidate
19666 && ALG_USABLE_P (candidate))
19667 max = algs->size[i].max;
19668 }
19669 /* If there aren't any usable algorithms, then recursing on
19670 smaller sizes isn't going to find anything. Just return the
19671 simple byte-at-a-time copy loop. */
19672 if (!any_alg_usable_p)
19673 {
19674 /* Pick something reasonable. */
19675 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19676 *dynamic_check = 128;
19677 return loop_1_byte;
19678 }
19679 if (max == -1)
19680 max = 4096;
19681 alg = decide_alg (count, max / 2, memset, dynamic_check);
19682 gcc_assert (*dynamic_check == -1);
19683 gcc_assert (alg != libcall);
19684 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19685 *dynamic_check = max;
19686 return alg;
19687 }
19688 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
19689 #undef ALG_USABLE_P
19690 }
19691
19692 /* Decide on alignment. We know that the operand is already aligned to ALIGN
19693 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
19694 static int
19695 decide_alignment (int align,
19696 enum stringop_alg alg,
19697 int expected_size)
19698 {
19699 int desired_align = 0;
19700 switch (alg)
19701 {
19702 case no_stringop:
19703 gcc_unreachable ();
19704 case loop:
19705 case unrolled_loop:
19706 desired_align = GET_MODE_SIZE (Pmode);
19707 break;
19708 case rep_prefix_8_byte:
19709 desired_align = 8;
19710 break;
19711 case rep_prefix_4_byte:
19712 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
19713 copying whole cacheline at once. */
19714 if (TARGET_PENTIUMPRO)
19715 desired_align = 8;
19716 else
19717 desired_align = 4;
19718 break;
19719 case rep_prefix_1_byte:
19720 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
19721 copying whole cacheline at once. */
19722 if (TARGET_PENTIUMPRO)
19723 desired_align = 8;
19724 else
19725 desired_align = 1;
19726 break;
19727 case loop_1_byte:
19728 desired_align = 1;
19729 break;
19730 case libcall:
19731 return 0;
19732 }
19733
19734 if (optimize_size)
19735 desired_align = 1;
19736 if (desired_align < align)
19737 desired_align = align;
19738 if (expected_size != -1 && expected_size < 4)
19739 desired_align = align;
19740 return desired_align;
19741 }
19742
19743 /* Return the smallest power of 2 greater than VAL. */
19744 static int
19745 smallest_pow2_greater_than (int val)
19746 {
19747 int ret = 1;
19748 while (ret <= val)
19749 ret <<= 1;
19750 return ret;
19751 }
19752
19753 /* Expand string move (memcpy) operation. Use i386 string operations when
19754 profitable. expand_setmem contains similar code. The code depends upon
19755 architecture, block size and alignment, but always has the same
19756 overall structure:
19757
19758 1) Prologue guard: Conditional that jumps up to epilogues for small
19759 blocks that can be handled by epilogue alone. This is faster but
19760 also needed for correctness, since prologue assume the block is larger
19761 than the desired alignment.
19762
19763 Optional dynamic check for size and libcall for large
19764 blocks is emitted here too, with -minline-stringops-dynamically.
19765
19766 2) Prologue: copy first few bytes in order to get destination aligned
19767 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
19768 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
19769 We emit either a jump tree on power of two sized blocks, or a byte loop.
19770
19771 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
19772 with specified algorithm.
19773
19774 4) Epilogue: code copying tail of the block that is too small to be
19775 handled by main body (or up to size guarded by prologue guard). */
19776
19777 bool
19778 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
19779 rtx expected_align_exp, rtx expected_size_exp)
19780 {
19781 rtx destreg;
19782 rtx srcreg;
19783 rtx label = NULL;
19784 rtx tmp;
19785 rtx jump_around_label = NULL;
19786 HOST_WIDE_INT align = 1;
19787 unsigned HOST_WIDE_INT count = 0;
19788 HOST_WIDE_INT expected_size = -1;
19789 int size_needed = 0, epilogue_size_needed;
19790 int desired_align = 0, align_bytes = 0;
19791 enum stringop_alg alg;
19792 int dynamic_check;
19793 bool need_zero_guard = false;
19794
19795 if (CONST_INT_P (align_exp))
19796 align = INTVAL (align_exp);
19797 /* i386 can do misaligned access on reasonably increased cost. */
19798 if (CONST_INT_P (expected_align_exp)
19799 && INTVAL (expected_align_exp) > align)
19800 align = INTVAL (expected_align_exp);
19801 /* ALIGN is the minimum of destination and source alignment, but we care here
19802 just about destination alignment. */
19803 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
19804 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
19805
19806 if (CONST_INT_P (count_exp))
19807 count = expected_size = INTVAL (count_exp);
19808 if (CONST_INT_P (expected_size_exp) && count == 0)
19809 expected_size = INTVAL (expected_size_exp);
19810
19811 /* Make sure we don't need to care about overflow later on. */
19812 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
19813 return false;
19814
19815 /* Step 0: Decide on preferred algorithm, desired alignment and
19816 size of chunks to be copied by main loop. */
19817
19818 alg = decide_alg (count, expected_size, false, &dynamic_check);
19819 desired_align = decide_alignment (align, alg, expected_size);
19820
19821 if (!TARGET_ALIGN_STRINGOPS)
19822 align = desired_align;
19823
19824 if (alg == libcall)
19825 return false;
19826 gcc_assert (alg != no_stringop);
19827 if (!count)
19828 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
19829 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
19830 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
19831 switch (alg)
19832 {
19833 case libcall:
19834 case no_stringop:
19835 gcc_unreachable ();
19836 case loop:
19837 need_zero_guard = true;
19838 size_needed = GET_MODE_SIZE (Pmode);
19839 break;
19840 case unrolled_loop:
19841 need_zero_guard = true;
19842 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
19843 break;
19844 case rep_prefix_8_byte:
19845 size_needed = 8;
19846 break;
19847 case rep_prefix_4_byte:
19848 size_needed = 4;
19849 break;
19850 case rep_prefix_1_byte:
19851 size_needed = 1;
19852 break;
19853 case loop_1_byte:
19854 need_zero_guard = true;
19855 size_needed = 1;
19856 break;
19857 }
19858
19859 epilogue_size_needed = size_needed;
19860
19861 /* Step 1: Prologue guard. */
19862
19863 /* Alignment code needs count to be in register. */
19864 if (CONST_INT_P (count_exp) && desired_align > align)
19865 {
19866 if (INTVAL (count_exp) > desired_align
19867 && INTVAL (count_exp) > size_needed)
19868 {
19869 align_bytes
19870 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
19871 if (align_bytes <= 0)
19872 align_bytes = 0;
19873 else
19874 align_bytes = desired_align - align_bytes;
19875 }
19876 if (align_bytes == 0)
19877 count_exp = force_reg (counter_mode (count_exp), count_exp);
19878 }
19879 gcc_assert (desired_align >= 1 && align >= 1);
19880
19881 /* Ensure that alignment prologue won't copy past end of block. */
19882 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
19883 {
19884 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
19885 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
19886 Make sure it is power of 2. */
19887 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
19888
19889 if (count)
19890 {
19891 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
19892 {
19893 /* If main algorithm works on QImode, no epilogue is needed.
19894 For small sizes just don't align anything. */
19895 if (size_needed == 1)
19896 desired_align = align;
19897 else
19898 goto epilogue;
19899 }
19900 }
19901 else
19902 {
19903 label = gen_label_rtx ();
19904 emit_cmp_and_jump_insns (count_exp,
19905 GEN_INT (epilogue_size_needed),
19906 LTU, 0, counter_mode (count_exp), 1, label);
19907 if (expected_size == -1 || expected_size < epilogue_size_needed)
19908 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19909 else
19910 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19911 }
19912 }
19913
19914 /* Emit code to decide on runtime whether library call or inline should be
19915 used. */
19916 if (dynamic_check != -1)
19917 {
19918 if (CONST_INT_P (count_exp))
19919 {
19920 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
19921 {
19922 emit_block_move_via_libcall (dst, src, count_exp, false);
19923 count_exp = const0_rtx;
19924 goto epilogue;
19925 }
19926 }
19927 else
19928 {
19929 rtx hot_label = gen_label_rtx ();
19930 jump_around_label = gen_label_rtx ();
19931 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
19932 LEU, 0, GET_MODE (count_exp), 1, hot_label);
19933 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19934 emit_block_move_via_libcall (dst, src, count_exp, false);
19935 emit_jump (jump_around_label);
19936 emit_label (hot_label);
19937 }
19938 }
19939
19940 /* Step 2: Alignment prologue. */
19941
19942 if (desired_align > align)
19943 {
19944 if (align_bytes == 0)
19945 {
19946 /* Except for the first move in epilogue, we no longer know
19947 constant offset in aliasing info. It don't seems to worth
19948 the pain to maintain it for the first move, so throw away
19949 the info early. */
19950 src = change_address (src, BLKmode, srcreg);
19951 dst = change_address (dst, BLKmode, destreg);
19952 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
19953 desired_align);
19954 }
19955 else
19956 {
19957 /* If we know how many bytes need to be stored before dst is
19958 sufficiently aligned, maintain aliasing info accurately. */
19959 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
19960 desired_align, align_bytes);
19961 count_exp = plus_constant (count_exp, -align_bytes);
19962 count -= align_bytes;
19963 }
19964 if (need_zero_guard
19965 && (count < (unsigned HOST_WIDE_INT) size_needed
19966 || (align_bytes == 0
19967 && count < ((unsigned HOST_WIDE_INT) size_needed
19968 + desired_align - align))))
19969 {
19970 /* It is possible that we copied enough so the main loop will not
19971 execute. */
19972 gcc_assert (size_needed > 1);
19973 if (label == NULL_RTX)
19974 label = gen_label_rtx ();
19975 emit_cmp_and_jump_insns (count_exp,
19976 GEN_INT (size_needed),
19977 LTU, 0, counter_mode (count_exp), 1, label);
19978 if (expected_size == -1
19979 || expected_size < (desired_align - align) / 2 + size_needed)
19980 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19981 else
19982 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19983 }
19984 }
19985 if (label && size_needed == 1)
19986 {
19987 emit_label (label);
19988 LABEL_NUSES (label) = 1;
19989 label = NULL;
19990 epilogue_size_needed = 1;
19991 }
19992 else if (label == NULL_RTX)
19993 epilogue_size_needed = size_needed;
19994
19995 /* Step 3: Main loop. */
19996
19997 switch (alg)
19998 {
19999 case libcall:
20000 case no_stringop:
20001 gcc_unreachable ();
20002 case loop_1_byte:
20003 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20004 count_exp, QImode, 1, expected_size);
20005 break;
20006 case loop:
20007 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20008 count_exp, Pmode, 1, expected_size);
20009 break;
20010 case unrolled_loop:
20011 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
20012 registers for 4 temporaries anyway. */
20013 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20014 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
20015 expected_size);
20016 break;
20017 case rep_prefix_8_byte:
20018 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20019 DImode);
20020 break;
20021 case rep_prefix_4_byte:
20022 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20023 SImode);
20024 break;
20025 case rep_prefix_1_byte:
20026 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20027 QImode);
20028 break;
20029 }
20030 /* Adjust properly the offset of src and dest memory for aliasing. */
20031 if (CONST_INT_P (count_exp))
20032 {
20033 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
20034 (count / size_needed) * size_needed);
20035 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
20036 (count / size_needed) * size_needed);
20037 }
20038 else
20039 {
20040 src = change_address (src, BLKmode, srcreg);
20041 dst = change_address (dst, BLKmode, destreg);
20042 }
20043
20044 /* Step 4: Epilogue to copy the remaining bytes. */
20045 epilogue:
20046 if (label)
20047 {
20048 /* When the main loop is done, COUNT_EXP might hold original count,
20049 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
20050 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
20051 bytes. Compensate if needed. */
20052
20053 if (size_needed < epilogue_size_needed)
20054 {
20055 tmp =
20056 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
20057 GEN_INT (size_needed - 1), count_exp, 1,
20058 OPTAB_DIRECT);
20059 if (tmp != count_exp)
20060 emit_move_insn (count_exp, tmp);
20061 }
20062 emit_label (label);
20063 LABEL_NUSES (label) = 1;
20064 }
20065
20066 if (count_exp != const0_rtx && epilogue_size_needed > 1)
20067 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
20068 epilogue_size_needed);
20069 if (jump_around_label)
20070 emit_label (jump_around_label);
20071 return true;
20072 }
20073
20074 /* Helper function for memcpy. For QImode value 0xXY produce
20075 0xXYXYXYXY of wide specified by MODE. This is essentially
20076 a * 0x10101010, but we can do slightly better than
20077 synth_mult by unwinding the sequence by hand on CPUs with
20078 slow multiply. */
20079 static rtx
20080 promote_duplicated_reg (enum machine_mode mode, rtx val)
20081 {
20082 enum machine_mode valmode = GET_MODE (val);
20083 rtx tmp;
20084 int nops = mode == DImode ? 3 : 2;
20085
20086 gcc_assert (mode == SImode || mode == DImode);
20087 if (val == const0_rtx)
20088 return copy_to_mode_reg (mode, const0_rtx);
20089 if (CONST_INT_P (val))
20090 {
20091 HOST_WIDE_INT v = INTVAL (val) & 255;
20092
20093 v |= v << 8;
20094 v |= v << 16;
20095 if (mode == DImode)
20096 v |= (v << 16) << 16;
20097 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
20098 }
20099
20100 if (valmode == VOIDmode)
20101 valmode = QImode;
20102 if (valmode != QImode)
20103 val = gen_lowpart (QImode, val);
20104 if (mode == QImode)
20105 return val;
20106 if (!TARGET_PARTIAL_REG_STALL)
20107 nops--;
20108 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
20109 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
20110 <= (ix86_cost->shift_const + ix86_cost->add) * nops
20111 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
20112 {
20113 rtx reg = convert_modes (mode, QImode, val, true);
20114 tmp = promote_duplicated_reg (mode, const1_rtx);
20115 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
20116 OPTAB_DIRECT);
20117 }
20118 else
20119 {
20120 rtx reg = convert_modes (mode, QImode, val, true);
20121
20122 if (!TARGET_PARTIAL_REG_STALL)
20123 if (mode == SImode)
20124 emit_insn (gen_movsi_insv_1 (reg, reg));
20125 else
20126 emit_insn (gen_movdi_insv_1 (reg, reg));
20127 else
20128 {
20129 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
20130 NULL, 1, OPTAB_DIRECT);
20131 reg =
20132 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20133 }
20134 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
20135 NULL, 1, OPTAB_DIRECT);
20136 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20137 if (mode == SImode)
20138 return reg;
20139 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
20140 NULL, 1, OPTAB_DIRECT);
20141 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20142 return reg;
20143 }
20144 }
20145
20146 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
20147 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
20148 alignment from ALIGN to DESIRED_ALIGN. */
20149 static rtx
20150 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
20151 {
20152 rtx promoted_val;
20153
20154 if (TARGET_64BIT
20155 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
20156 promoted_val = promote_duplicated_reg (DImode, val);
20157 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
20158 promoted_val = promote_duplicated_reg (SImode, val);
20159 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
20160 promoted_val = promote_duplicated_reg (HImode, val);
20161 else
20162 promoted_val = val;
20163
20164 return promoted_val;
20165 }
20166
20167 /* Expand string clear operation (bzero). Use i386 string operations when
20168 profitable. See expand_movmem comment for explanation of individual
20169 steps performed. */
20170 bool
20171 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
20172 rtx expected_align_exp, rtx expected_size_exp)
20173 {
20174 rtx destreg;
20175 rtx label = NULL;
20176 rtx tmp;
20177 rtx jump_around_label = NULL;
20178 HOST_WIDE_INT align = 1;
20179 unsigned HOST_WIDE_INT count = 0;
20180 HOST_WIDE_INT expected_size = -1;
20181 int size_needed = 0, epilogue_size_needed;
20182 int desired_align = 0, align_bytes = 0;
20183 enum stringop_alg alg;
20184 rtx promoted_val = NULL;
20185 bool force_loopy_epilogue = false;
20186 int dynamic_check;
20187 bool need_zero_guard = false;
20188
20189 if (CONST_INT_P (align_exp))
20190 align = INTVAL (align_exp);
20191 /* i386 can do misaligned access on reasonably increased cost. */
20192 if (CONST_INT_P (expected_align_exp)
20193 && INTVAL (expected_align_exp) > align)
20194 align = INTVAL (expected_align_exp);
20195 if (CONST_INT_P (count_exp))
20196 count = expected_size = INTVAL (count_exp);
20197 if (CONST_INT_P (expected_size_exp) && count == 0)
20198 expected_size = INTVAL (expected_size_exp);
20199
20200 /* Make sure we don't need to care about overflow later on. */
20201 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
20202 return false;
20203
20204 /* Step 0: Decide on preferred algorithm, desired alignment and
20205 size of chunks to be copied by main loop. */
20206
20207 alg = decide_alg (count, expected_size, true, &dynamic_check);
20208 desired_align = decide_alignment (align, alg, expected_size);
20209
20210 if (!TARGET_ALIGN_STRINGOPS)
20211 align = desired_align;
20212
20213 if (alg == libcall)
20214 return false;
20215 gcc_assert (alg != no_stringop);
20216 if (!count)
20217 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
20218 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
20219 switch (alg)
20220 {
20221 case libcall:
20222 case no_stringop:
20223 gcc_unreachable ();
20224 case loop:
20225 need_zero_guard = true;
20226 size_needed = GET_MODE_SIZE (Pmode);
20227 break;
20228 case unrolled_loop:
20229 need_zero_guard = true;
20230 size_needed = GET_MODE_SIZE (Pmode) * 4;
20231 break;
20232 case rep_prefix_8_byte:
20233 size_needed = 8;
20234 break;
20235 case rep_prefix_4_byte:
20236 size_needed = 4;
20237 break;
20238 case rep_prefix_1_byte:
20239 size_needed = 1;
20240 break;
20241 case loop_1_byte:
20242 need_zero_guard = true;
20243 size_needed = 1;
20244 break;
20245 }
20246 epilogue_size_needed = size_needed;
20247
20248 /* Step 1: Prologue guard. */
20249
20250 /* Alignment code needs count to be in register. */
20251 if (CONST_INT_P (count_exp) && desired_align > align)
20252 {
20253 if (INTVAL (count_exp) > desired_align
20254 && INTVAL (count_exp) > size_needed)
20255 {
20256 align_bytes
20257 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
20258 if (align_bytes <= 0)
20259 align_bytes = 0;
20260 else
20261 align_bytes = desired_align - align_bytes;
20262 }
20263 if (align_bytes == 0)
20264 {
20265 enum machine_mode mode = SImode;
20266 if (TARGET_64BIT && (count & ~0xffffffff))
20267 mode = DImode;
20268 count_exp = force_reg (mode, count_exp);
20269 }
20270 }
20271 /* Do the cheap promotion to allow better CSE across the
20272 main loop and epilogue (ie one load of the big constant in the
20273 front of all code. */
20274 if (CONST_INT_P (val_exp))
20275 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
20276 desired_align, align);
20277 /* Ensure that alignment prologue won't copy past end of block. */
20278 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
20279 {
20280 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
20281 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
20282 Make sure it is power of 2. */
20283 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
20284
20285 /* To improve performance of small blocks, we jump around the VAL
20286 promoting mode. This mean that if the promoted VAL is not constant,
20287 we might not use it in the epilogue and have to use byte
20288 loop variant. */
20289 if (epilogue_size_needed > 2 && !promoted_val)
20290 force_loopy_epilogue = true;
20291 if (count)
20292 {
20293 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
20294 {
20295 /* If main algorithm works on QImode, no epilogue is needed.
20296 For small sizes just don't align anything. */
20297 if (size_needed == 1)
20298 desired_align = align;
20299 else
20300 goto epilogue;
20301 }
20302 }
20303 else
20304 {
20305 label = gen_label_rtx ();
20306 emit_cmp_and_jump_insns (count_exp,
20307 GEN_INT (epilogue_size_needed),
20308 LTU, 0, counter_mode (count_exp), 1, label);
20309 if (expected_size == -1 || expected_size <= epilogue_size_needed)
20310 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20311 else
20312 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20313 }
20314 }
20315 if (dynamic_check != -1)
20316 {
20317 rtx hot_label = gen_label_rtx ();
20318 jump_around_label = gen_label_rtx ();
20319 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
20320 LEU, 0, counter_mode (count_exp), 1, hot_label);
20321 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20322 set_storage_via_libcall (dst, count_exp, val_exp, false);
20323 emit_jump (jump_around_label);
20324 emit_label (hot_label);
20325 }
20326
20327 /* Step 2: Alignment prologue. */
20328
20329 /* Do the expensive promotion once we branched off the small blocks. */
20330 if (!promoted_val)
20331 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
20332 desired_align, align);
20333 gcc_assert (desired_align >= 1 && align >= 1);
20334
20335 if (desired_align > align)
20336 {
20337 if (align_bytes == 0)
20338 {
20339 /* Except for the first move in epilogue, we no longer know
20340 constant offset in aliasing info. It don't seems to worth
20341 the pain to maintain it for the first move, so throw away
20342 the info early. */
20343 dst = change_address (dst, BLKmode, destreg);
20344 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
20345 desired_align);
20346 }
20347 else
20348 {
20349 /* If we know how many bytes need to be stored before dst is
20350 sufficiently aligned, maintain aliasing info accurately. */
20351 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
20352 desired_align, align_bytes);
20353 count_exp = plus_constant (count_exp, -align_bytes);
20354 count -= align_bytes;
20355 }
20356 if (need_zero_guard
20357 && (count < (unsigned HOST_WIDE_INT) size_needed
20358 || (align_bytes == 0
20359 && count < ((unsigned HOST_WIDE_INT) size_needed
20360 + desired_align - align))))
20361 {
20362 /* It is possible that we copied enough so the main loop will not
20363 execute. */
20364 gcc_assert (size_needed > 1);
20365 if (label == NULL_RTX)
20366 label = gen_label_rtx ();
20367 emit_cmp_and_jump_insns (count_exp,
20368 GEN_INT (size_needed),
20369 LTU, 0, counter_mode (count_exp), 1, label);
20370 if (expected_size == -1
20371 || expected_size < (desired_align - align) / 2 + size_needed)
20372 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20373 else
20374 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20375 }
20376 }
20377 if (label && size_needed == 1)
20378 {
20379 emit_label (label);
20380 LABEL_NUSES (label) = 1;
20381 label = NULL;
20382 promoted_val = val_exp;
20383 epilogue_size_needed = 1;
20384 }
20385 else if (label == NULL_RTX)
20386 epilogue_size_needed = size_needed;
20387
20388 /* Step 3: Main loop. */
20389
20390 switch (alg)
20391 {
20392 case libcall:
20393 case no_stringop:
20394 gcc_unreachable ();
20395 case loop_1_byte:
20396 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
20397 count_exp, QImode, 1, expected_size);
20398 break;
20399 case loop:
20400 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
20401 count_exp, Pmode, 1, expected_size);
20402 break;
20403 case unrolled_loop:
20404 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
20405 count_exp, Pmode, 4, expected_size);
20406 break;
20407 case rep_prefix_8_byte:
20408 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
20409 DImode, val_exp);
20410 break;
20411 case rep_prefix_4_byte:
20412 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
20413 SImode, val_exp);
20414 break;
20415 case rep_prefix_1_byte:
20416 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
20417 QImode, val_exp);
20418 break;
20419 }
20420 /* Adjust properly the offset of src and dest memory for aliasing. */
20421 if (CONST_INT_P (count_exp))
20422 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
20423 (count / size_needed) * size_needed);
20424 else
20425 dst = change_address (dst, BLKmode, destreg);
20426
20427 /* Step 4: Epilogue to copy the remaining bytes. */
20428
20429 if (label)
20430 {
20431 /* When the main loop is done, COUNT_EXP might hold original count,
20432 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
20433 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
20434 bytes. Compensate if needed. */
20435
20436 if (size_needed < epilogue_size_needed)
20437 {
20438 tmp =
20439 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
20440 GEN_INT (size_needed - 1), count_exp, 1,
20441 OPTAB_DIRECT);
20442 if (tmp != count_exp)
20443 emit_move_insn (count_exp, tmp);
20444 }
20445 emit_label (label);
20446 LABEL_NUSES (label) = 1;
20447 }
20448 epilogue:
20449 if (count_exp != const0_rtx && epilogue_size_needed > 1)
20450 {
20451 if (force_loopy_epilogue)
20452 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
20453 epilogue_size_needed);
20454 else
20455 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
20456 epilogue_size_needed);
20457 }
20458 if (jump_around_label)
20459 emit_label (jump_around_label);
20460 return true;
20461 }
20462
20463 /* Expand the appropriate insns for doing strlen if not just doing
20464 repnz; scasb
20465
20466 out = result, initialized with the start address
20467 align_rtx = alignment of the address.
20468 scratch = scratch register, initialized with the startaddress when
20469 not aligned, otherwise undefined
20470
20471 This is just the body. It needs the initializations mentioned above and
20472 some address computing at the end. These things are done in i386.md. */
20473
20474 static void
20475 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
20476 {
20477 int align;
20478 rtx tmp;
20479 rtx align_2_label = NULL_RTX;
20480 rtx align_3_label = NULL_RTX;
20481 rtx align_4_label = gen_label_rtx ();
20482 rtx end_0_label = gen_label_rtx ();
20483 rtx mem;
20484 rtx tmpreg = gen_reg_rtx (SImode);
20485 rtx scratch = gen_reg_rtx (SImode);
20486 rtx cmp;
20487
20488 align = 0;
20489 if (CONST_INT_P (align_rtx))
20490 align = INTVAL (align_rtx);
20491
20492 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
20493
20494 /* Is there a known alignment and is it less than 4? */
20495 if (align < 4)
20496 {
20497 rtx scratch1 = gen_reg_rtx (Pmode);
20498 emit_move_insn (scratch1, out);
20499 /* Is there a known alignment and is it not 2? */
20500 if (align != 2)
20501 {
20502 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
20503 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
20504
20505 /* Leave just the 3 lower bits. */
20506 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
20507 NULL_RTX, 0, OPTAB_WIDEN);
20508
20509 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
20510 Pmode, 1, align_4_label);
20511 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
20512 Pmode, 1, align_2_label);
20513 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
20514 Pmode, 1, align_3_label);
20515 }
20516 else
20517 {
20518 /* Since the alignment is 2, we have to check 2 or 0 bytes;
20519 check if is aligned to 4 - byte. */
20520
20521 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
20522 NULL_RTX, 0, OPTAB_WIDEN);
20523
20524 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
20525 Pmode, 1, align_4_label);
20526 }
20527
20528 mem = change_address (src, QImode, out);
20529
20530 /* Now compare the bytes. */
20531
20532 /* Compare the first n unaligned byte on a byte per byte basis. */
20533 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
20534 QImode, 1, end_0_label);
20535
20536 /* Increment the address. */
20537 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20538
20539 /* Not needed with an alignment of 2 */
20540 if (align != 2)
20541 {
20542 emit_label (align_2_label);
20543
20544 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
20545 end_0_label);
20546
20547 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20548
20549 emit_label (align_3_label);
20550 }
20551
20552 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
20553 end_0_label);
20554
20555 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20556 }
20557
20558 /* Generate loop to check 4 bytes at a time. It is not a good idea to
20559 align this loop. It gives only huge programs, but does not help to
20560 speed up. */
20561 emit_label (align_4_label);
20562
20563 mem = change_address (src, SImode, out);
20564 emit_move_insn (scratch, mem);
20565 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
20566
20567 /* This formula yields a nonzero result iff one of the bytes is zero.
20568 This saves three branches inside loop and many cycles. */
20569
20570 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
20571 emit_insn (gen_one_cmplsi2 (scratch, scratch));
20572 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
20573 emit_insn (gen_andsi3 (tmpreg, tmpreg,
20574 gen_int_mode (0x80808080, SImode)));
20575 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
20576 align_4_label);
20577
20578 if (TARGET_CMOVE)
20579 {
20580 rtx reg = gen_reg_rtx (SImode);
20581 rtx reg2 = gen_reg_rtx (Pmode);
20582 emit_move_insn (reg, tmpreg);
20583 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
20584
20585 /* If zero is not in the first two bytes, move two bytes forward. */
20586 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
20587 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20588 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
20589 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
20590 gen_rtx_IF_THEN_ELSE (SImode, tmp,
20591 reg,
20592 tmpreg)));
20593 /* Emit lea manually to avoid clobbering of flags. */
20594 emit_insn (gen_rtx_SET (SImode, reg2,
20595 gen_rtx_PLUS (Pmode, out, const2_rtx)));
20596
20597 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20598 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
20599 emit_insn (gen_rtx_SET (VOIDmode, out,
20600 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
20601 reg2,
20602 out)));
20603 }
20604 else
20605 {
20606 rtx end_2_label = gen_label_rtx ();
20607 /* Is zero in the first two bytes? */
20608
20609 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
20610 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20611 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
20612 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20613 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
20614 pc_rtx);
20615 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20616 JUMP_LABEL (tmp) = end_2_label;
20617
20618 /* Not in the first two. Move two bytes forward. */
20619 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
20620 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
20621
20622 emit_label (end_2_label);
20623
20624 }
20625
20626 /* Avoid branch in fixing the byte. */
20627 tmpreg = gen_lowpart (QImode, tmpreg);
20628 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
20629 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
20630 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
20631 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
20632
20633 emit_label (end_0_label);
20634 }
20635
20636 /* Expand strlen. */
20637
20638 bool
20639 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
20640 {
20641 rtx addr, scratch1, scratch2, scratch3, scratch4;
20642
20643 /* The generic case of strlen expander is long. Avoid it's
20644 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
20645
20646 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
20647 && !TARGET_INLINE_ALL_STRINGOPS
20648 && !optimize_insn_for_size_p ()
20649 && (!CONST_INT_P (align) || INTVAL (align) < 4))
20650 return false;
20651
20652 addr = force_reg (Pmode, XEXP (src, 0));
20653 scratch1 = gen_reg_rtx (Pmode);
20654
20655 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
20656 && !optimize_insn_for_size_p ())
20657 {
20658 /* Well it seems that some optimizer does not combine a call like
20659 foo(strlen(bar), strlen(bar));
20660 when the move and the subtraction is done here. It does calculate
20661 the length just once when these instructions are done inside of
20662 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
20663 often used and I use one fewer register for the lifetime of
20664 output_strlen_unroll() this is better. */
20665
20666 emit_move_insn (out, addr);
20667
20668 ix86_expand_strlensi_unroll_1 (out, src, align);
20669
20670 /* strlensi_unroll_1 returns the address of the zero at the end of
20671 the string, like memchr(), so compute the length by subtracting
20672 the start address. */
20673 emit_insn (ix86_gen_sub3 (out, out, addr));
20674 }
20675 else
20676 {
20677 rtx unspec;
20678
20679 /* Can't use this if the user has appropriated eax, ecx, or edi. */
20680 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
20681 return false;
20682
20683 scratch2 = gen_reg_rtx (Pmode);
20684 scratch3 = gen_reg_rtx (Pmode);
20685 scratch4 = force_reg (Pmode, constm1_rtx);
20686
20687 emit_move_insn (scratch3, addr);
20688 eoschar = force_reg (QImode, eoschar);
20689
20690 src = replace_equiv_address_nv (src, scratch3);
20691
20692 /* If .md starts supporting :P, this can be done in .md. */
20693 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
20694 scratch4), UNSPEC_SCAS);
20695 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
20696 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
20697 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
20698 }
20699 return true;
20700 }
20701
20702 /* For given symbol (function) construct code to compute address of it's PLT
20703 entry in large x86-64 PIC model. */
20704 rtx
20705 construct_plt_address (rtx symbol)
20706 {
20707 rtx tmp = gen_reg_rtx (Pmode);
20708 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
20709
20710 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
20711 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
20712
20713 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
20714 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
20715 return tmp;
20716 }
20717
20718 rtx
20719 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
20720 rtx callarg2,
20721 rtx pop, int sibcall)
20722 {
20723 rtx use = NULL, call;
20724
20725 if (pop == const0_rtx)
20726 pop = NULL;
20727 gcc_assert (!TARGET_64BIT || !pop);
20728
20729 if (TARGET_MACHO && !TARGET_64BIT)
20730 {
20731 #if TARGET_MACHO
20732 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
20733 fnaddr = machopic_indirect_call_target (fnaddr);
20734 #endif
20735 }
20736 else
20737 {
20738 /* Static functions and indirect calls don't need the pic register. */
20739 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
20740 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20741 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
20742 use_reg (&use, pic_offset_table_rtx);
20743 }
20744
20745 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
20746 {
20747 rtx al = gen_rtx_REG (QImode, AX_REG);
20748 emit_move_insn (al, callarg2);
20749 use_reg (&use, al);
20750 }
20751
20752 if (ix86_cmodel == CM_LARGE_PIC
20753 && MEM_P (fnaddr)
20754 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20755 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
20756 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
20757 else if (sibcall
20758 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
20759 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
20760 {
20761 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
20762 fnaddr = gen_rtx_MEM (QImode, fnaddr);
20763 }
20764
20765 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
20766 if (retval)
20767 call = gen_rtx_SET (VOIDmode, retval, call);
20768 if (pop)
20769 {
20770 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
20771 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
20772 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
20773 }
20774 if (TARGET_64BIT
20775 && ix86_cfun_abi () == MS_ABI
20776 && (!callarg2 || INTVAL (callarg2) != -2))
20777 {
20778 /* We need to represent that SI and DI registers are clobbered
20779 by SYSV calls. */
20780 static int clobbered_registers[] = {
20781 XMM6_REG, XMM7_REG, XMM8_REG,
20782 XMM9_REG, XMM10_REG, XMM11_REG,
20783 XMM12_REG, XMM13_REG, XMM14_REG,
20784 XMM15_REG, SI_REG, DI_REG
20785 };
20786 unsigned int i;
20787 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
20788 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
20789 UNSPEC_MS_TO_SYSV_CALL);
20790
20791 vec[0] = call;
20792 vec[1] = unspec;
20793 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
20794 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
20795 ? TImode : DImode,
20796 gen_rtx_REG
20797 (SSE_REGNO_P (clobbered_registers[i])
20798 ? TImode : DImode,
20799 clobbered_registers[i]));
20800
20801 call = gen_rtx_PARALLEL (VOIDmode,
20802 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
20803 + 2, vec));
20804 }
20805
20806 call = emit_call_insn (call);
20807 if (use)
20808 CALL_INSN_FUNCTION_USAGE (call) = use;
20809
20810 return call;
20811 }
20812
20813 \f
20814 /* Clear stack slot assignments remembered from previous functions.
20815 This is called from INIT_EXPANDERS once before RTL is emitted for each
20816 function. */
20817
20818 static struct machine_function *
20819 ix86_init_machine_status (void)
20820 {
20821 struct machine_function *f;
20822
20823 f = ggc_alloc_cleared_machine_function ();
20824 f->use_fast_prologue_epilogue_nregs = -1;
20825 f->tls_descriptor_call_expanded_p = 0;
20826 f->call_abi = ix86_abi;
20827
20828 return f;
20829 }
20830
20831 /* Return a MEM corresponding to a stack slot with mode MODE.
20832 Allocate a new slot if necessary.
20833
20834 The RTL for a function can have several slots available: N is
20835 which slot to use. */
20836
20837 rtx
20838 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
20839 {
20840 struct stack_local_entry *s;
20841
20842 gcc_assert (n < MAX_386_STACK_LOCALS);
20843
20844 /* Virtual slot is valid only before vregs are instantiated. */
20845 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
20846
20847 for (s = ix86_stack_locals; s; s = s->next)
20848 if (s->mode == mode && s->n == n)
20849 return copy_rtx (s->rtl);
20850
20851 s = ggc_alloc_stack_local_entry ();
20852 s->n = n;
20853 s->mode = mode;
20854 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
20855
20856 s->next = ix86_stack_locals;
20857 ix86_stack_locals = s;
20858 return s->rtl;
20859 }
20860
20861 /* Construct the SYMBOL_REF for the tls_get_addr function. */
20862
20863 static GTY(()) rtx ix86_tls_symbol;
20864 rtx
20865 ix86_tls_get_addr (void)
20866 {
20867
20868 if (!ix86_tls_symbol)
20869 {
20870 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
20871 (TARGET_ANY_GNU_TLS
20872 && !TARGET_64BIT)
20873 ? "___tls_get_addr"
20874 : "__tls_get_addr");
20875 }
20876
20877 return ix86_tls_symbol;
20878 }
20879
20880 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
20881
20882 static GTY(()) rtx ix86_tls_module_base_symbol;
20883 rtx
20884 ix86_tls_module_base (void)
20885 {
20886
20887 if (!ix86_tls_module_base_symbol)
20888 {
20889 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
20890 "_TLS_MODULE_BASE_");
20891 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
20892 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
20893 }
20894
20895 return ix86_tls_module_base_symbol;
20896 }
20897 \f
20898 /* Calculate the length of the memory address in the instruction
20899 encoding. Does not include the one-byte modrm, opcode, or prefix. */
20900
20901 int
20902 memory_address_length (rtx addr)
20903 {
20904 struct ix86_address parts;
20905 rtx base, index, disp;
20906 int len;
20907 int ok;
20908
20909 if (GET_CODE (addr) == PRE_DEC
20910 || GET_CODE (addr) == POST_INC
20911 || GET_CODE (addr) == PRE_MODIFY
20912 || GET_CODE (addr) == POST_MODIFY)
20913 return 0;
20914
20915 ok = ix86_decompose_address (addr, &parts);
20916 gcc_assert (ok);
20917
20918 if (parts.base && GET_CODE (parts.base) == SUBREG)
20919 parts.base = SUBREG_REG (parts.base);
20920 if (parts.index && GET_CODE (parts.index) == SUBREG)
20921 parts.index = SUBREG_REG (parts.index);
20922
20923 base = parts.base;
20924 index = parts.index;
20925 disp = parts.disp;
20926 len = 0;
20927
20928 /* Rule of thumb:
20929 - esp as the base always wants an index,
20930 - ebp as the base always wants a displacement,
20931 - r12 as the base always wants an index,
20932 - r13 as the base always wants a displacement. */
20933
20934 /* Register Indirect. */
20935 if (base && !index && !disp)
20936 {
20937 /* esp (for its index) and ebp (for its displacement) need
20938 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
20939 code. */
20940 if (REG_P (addr)
20941 && (addr == arg_pointer_rtx
20942 || addr == frame_pointer_rtx
20943 || REGNO (addr) == SP_REG
20944 || REGNO (addr) == BP_REG
20945 || REGNO (addr) == R12_REG
20946 || REGNO (addr) == R13_REG))
20947 len = 1;
20948 }
20949
20950 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
20951 is not disp32, but disp32(%rip), so for disp32
20952 SIB byte is needed, unless print_operand_address
20953 optimizes it into disp32(%rip) or (%rip) is implied
20954 by UNSPEC. */
20955 else if (disp && !base && !index)
20956 {
20957 len = 4;
20958 if (TARGET_64BIT)
20959 {
20960 rtx symbol = disp;
20961
20962 if (GET_CODE (disp) == CONST)
20963 symbol = XEXP (disp, 0);
20964 if (GET_CODE (symbol) == PLUS
20965 && CONST_INT_P (XEXP (symbol, 1)))
20966 symbol = XEXP (symbol, 0);
20967
20968 if (GET_CODE (symbol) != LABEL_REF
20969 && (GET_CODE (symbol) != SYMBOL_REF
20970 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
20971 && (GET_CODE (symbol) != UNSPEC
20972 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
20973 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
20974 len += 1;
20975 }
20976 }
20977
20978 else
20979 {
20980 /* Find the length of the displacement constant. */
20981 if (disp)
20982 {
20983 if (base && satisfies_constraint_K (disp))
20984 len = 1;
20985 else
20986 len = 4;
20987 }
20988 /* ebp always wants a displacement. Similarly r13. */
20989 else if (base && REG_P (base)
20990 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
20991 len = 1;
20992
20993 /* An index requires the two-byte modrm form.... */
20994 if (index
20995 /* ...like esp (or r12), which always wants an index. */
20996 || base == arg_pointer_rtx
20997 || base == frame_pointer_rtx
20998 || (base && REG_P (base)
20999 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
21000 len += 1;
21001 }
21002
21003 switch (parts.seg)
21004 {
21005 case SEG_FS:
21006 case SEG_GS:
21007 len += 1;
21008 break;
21009 default:
21010 break;
21011 }
21012
21013 return len;
21014 }
21015
21016 /* Compute default value for "length_immediate" attribute. When SHORTFORM
21017 is set, expect that insn have 8bit immediate alternative. */
21018 int
21019 ix86_attr_length_immediate_default (rtx insn, int shortform)
21020 {
21021 int len = 0;
21022 int i;
21023 extract_insn_cached (insn);
21024 for (i = recog_data.n_operands - 1; i >= 0; --i)
21025 if (CONSTANT_P (recog_data.operand[i]))
21026 {
21027 enum attr_mode mode = get_attr_mode (insn);
21028
21029 gcc_assert (!len);
21030 if (shortform && CONST_INT_P (recog_data.operand[i]))
21031 {
21032 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
21033 switch (mode)
21034 {
21035 case MODE_QI:
21036 len = 1;
21037 continue;
21038 case MODE_HI:
21039 ival = trunc_int_for_mode (ival, HImode);
21040 break;
21041 case MODE_SI:
21042 ival = trunc_int_for_mode (ival, SImode);
21043 break;
21044 default:
21045 break;
21046 }
21047 if (IN_RANGE (ival, -128, 127))
21048 {
21049 len = 1;
21050 continue;
21051 }
21052 }
21053 switch (mode)
21054 {
21055 case MODE_QI:
21056 len = 1;
21057 break;
21058 case MODE_HI:
21059 len = 2;
21060 break;
21061 case MODE_SI:
21062 len = 4;
21063 break;
21064 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
21065 case MODE_DI:
21066 len = 4;
21067 break;
21068 default:
21069 fatal_insn ("unknown insn mode", insn);
21070 }
21071 }
21072 return len;
21073 }
21074 /* Compute default value for "length_address" attribute. */
21075 int
21076 ix86_attr_length_address_default (rtx insn)
21077 {
21078 int i;
21079
21080 if (get_attr_type (insn) == TYPE_LEA)
21081 {
21082 rtx set = PATTERN (insn), addr;
21083
21084 if (GET_CODE (set) == PARALLEL)
21085 set = XVECEXP (set, 0, 0);
21086
21087 gcc_assert (GET_CODE (set) == SET);
21088
21089 addr = SET_SRC (set);
21090 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
21091 {
21092 if (GET_CODE (addr) == ZERO_EXTEND)
21093 addr = XEXP (addr, 0);
21094 if (GET_CODE (addr) == SUBREG)
21095 addr = SUBREG_REG (addr);
21096 }
21097
21098 return memory_address_length (addr);
21099 }
21100
21101 extract_insn_cached (insn);
21102 for (i = recog_data.n_operands - 1; i >= 0; --i)
21103 if (MEM_P (recog_data.operand[i]))
21104 {
21105 constrain_operands_cached (reload_completed);
21106 if (which_alternative != -1)
21107 {
21108 const char *constraints = recog_data.constraints[i];
21109 int alt = which_alternative;
21110
21111 while (*constraints == '=' || *constraints == '+')
21112 constraints++;
21113 while (alt-- > 0)
21114 while (*constraints++ != ',')
21115 ;
21116 /* Skip ignored operands. */
21117 if (*constraints == 'X')
21118 continue;
21119 }
21120 return memory_address_length (XEXP (recog_data.operand[i], 0));
21121 }
21122 return 0;
21123 }
21124
21125 /* Compute default value for "length_vex" attribute. It includes
21126 2 or 3 byte VEX prefix and 1 opcode byte. */
21127
21128 int
21129 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
21130 int has_vex_w)
21131 {
21132 int i;
21133
21134 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
21135 byte VEX prefix. */
21136 if (!has_0f_opcode || has_vex_w)
21137 return 3 + 1;
21138
21139 /* We can always use 2 byte VEX prefix in 32bit. */
21140 if (!TARGET_64BIT)
21141 return 2 + 1;
21142
21143 extract_insn_cached (insn);
21144
21145 for (i = recog_data.n_operands - 1; i >= 0; --i)
21146 if (REG_P (recog_data.operand[i]))
21147 {
21148 /* REX.W bit uses 3 byte VEX prefix. */
21149 if (GET_MODE (recog_data.operand[i]) == DImode
21150 && GENERAL_REG_P (recog_data.operand[i]))
21151 return 3 + 1;
21152 }
21153 else
21154 {
21155 /* REX.X or REX.B bits use 3 byte VEX prefix. */
21156 if (MEM_P (recog_data.operand[i])
21157 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
21158 return 3 + 1;
21159 }
21160
21161 return 2 + 1;
21162 }
21163 \f
21164 /* Return the maximum number of instructions a cpu can issue. */
21165
21166 static int
21167 ix86_issue_rate (void)
21168 {
21169 switch (ix86_tune)
21170 {
21171 case PROCESSOR_PENTIUM:
21172 case PROCESSOR_ATOM:
21173 case PROCESSOR_K6:
21174 return 2;
21175
21176 case PROCESSOR_PENTIUMPRO:
21177 case PROCESSOR_PENTIUM4:
21178 case PROCESSOR_ATHLON:
21179 case PROCESSOR_K8:
21180 case PROCESSOR_AMDFAM10:
21181 case PROCESSOR_NOCONA:
21182 case PROCESSOR_GENERIC32:
21183 case PROCESSOR_GENERIC64:
21184 case PROCESSOR_BDVER1:
21185 return 3;
21186
21187 case PROCESSOR_CORE2:
21188 return 4;
21189
21190 default:
21191 return 1;
21192 }
21193 }
21194
21195 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
21196 by DEP_INSN and nothing set by DEP_INSN. */
21197
21198 static int
21199 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
21200 {
21201 rtx set, set2;
21202
21203 /* Simplify the test for uninteresting insns. */
21204 if (insn_type != TYPE_SETCC
21205 && insn_type != TYPE_ICMOV
21206 && insn_type != TYPE_FCMOV
21207 && insn_type != TYPE_IBR)
21208 return 0;
21209
21210 if ((set = single_set (dep_insn)) != 0)
21211 {
21212 set = SET_DEST (set);
21213 set2 = NULL_RTX;
21214 }
21215 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
21216 && XVECLEN (PATTERN (dep_insn), 0) == 2
21217 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
21218 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
21219 {
21220 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
21221 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
21222 }
21223 else
21224 return 0;
21225
21226 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
21227 return 0;
21228
21229 /* This test is true if the dependent insn reads the flags but
21230 not any other potentially set register. */
21231 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
21232 return 0;
21233
21234 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
21235 return 0;
21236
21237 return 1;
21238 }
21239
21240 /* Return true iff USE_INSN has a memory address with operands set by
21241 SET_INSN. */
21242
21243 bool
21244 ix86_agi_dependent (rtx set_insn, rtx use_insn)
21245 {
21246 int i;
21247 extract_insn_cached (use_insn);
21248 for (i = recog_data.n_operands - 1; i >= 0; --i)
21249 if (MEM_P (recog_data.operand[i]))
21250 {
21251 rtx addr = XEXP (recog_data.operand[i], 0);
21252 return modified_in_p (addr, set_insn) != 0;
21253 }
21254 return false;
21255 }
21256
21257 static int
21258 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
21259 {
21260 enum attr_type insn_type, dep_insn_type;
21261 enum attr_memory memory;
21262 rtx set, set2;
21263 int dep_insn_code_number;
21264
21265 /* Anti and output dependencies have zero cost on all CPUs. */
21266 if (REG_NOTE_KIND (link) != 0)
21267 return 0;
21268
21269 dep_insn_code_number = recog_memoized (dep_insn);
21270
21271 /* If we can't recognize the insns, we can't really do anything. */
21272 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
21273 return cost;
21274
21275 insn_type = get_attr_type (insn);
21276 dep_insn_type = get_attr_type (dep_insn);
21277
21278 switch (ix86_tune)
21279 {
21280 case PROCESSOR_PENTIUM:
21281 /* Address Generation Interlock adds a cycle of latency. */
21282 if (insn_type == TYPE_LEA)
21283 {
21284 rtx addr = PATTERN (insn);
21285
21286 if (GET_CODE (addr) == PARALLEL)
21287 addr = XVECEXP (addr, 0, 0);
21288
21289 gcc_assert (GET_CODE (addr) == SET);
21290
21291 addr = SET_SRC (addr);
21292 if (modified_in_p (addr, dep_insn))
21293 cost += 1;
21294 }
21295 else if (ix86_agi_dependent (dep_insn, insn))
21296 cost += 1;
21297
21298 /* ??? Compares pair with jump/setcc. */
21299 if (ix86_flags_dependent (insn, dep_insn, insn_type))
21300 cost = 0;
21301
21302 /* Floating point stores require value to be ready one cycle earlier. */
21303 if (insn_type == TYPE_FMOV
21304 && get_attr_memory (insn) == MEMORY_STORE
21305 && !ix86_agi_dependent (dep_insn, insn))
21306 cost += 1;
21307 break;
21308
21309 case PROCESSOR_PENTIUMPRO:
21310 memory = get_attr_memory (insn);
21311
21312 /* INT->FP conversion is expensive. */
21313 if (get_attr_fp_int_src (dep_insn))
21314 cost += 5;
21315
21316 /* There is one cycle extra latency between an FP op and a store. */
21317 if (insn_type == TYPE_FMOV
21318 && (set = single_set (dep_insn)) != NULL_RTX
21319 && (set2 = single_set (insn)) != NULL_RTX
21320 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
21321 && MEM_P (SET_DEST (set2)))
21322 cost += 1;
21323
21324 /* Show ability of reorder buffer to hide latency of load by executing
21325 in parallel with previous instruction in case
21326 previous instruction is not needed to compute the address. */
21327 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
21328 && !ix86_agi_dependent (dep_insn, insn))
21329 {
21330 /* Claim moves to take one cycle, as core can issue one load
21331 at time and the next load can start cycle later. */
21332 if (dep_insn_type == TYPE_IMOV
21333 || dep_insn_type == TYPE_FMOV)
21334 cost = 1;
21335 else if (cost > 1)
21336 cost--;
21337 }
21338 break;
21339
21340 case PROCESSOR_K6:
21341 memory = get_attr_memory (insn);
21342
21343 /* The esp dependency is resolved before the instruction is really
21344 finished. */
21345 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
21346 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
21347 return 1;
21348
21349 /* INT->FP conversion is expensive. */
21350 if (get_attr_fp_int_src (dep_insn))
21351 cost += 5;
21352
21353 /* Show ability of reorder buffer to hide latency of load by executing
21354 in parallel with previous instruction in case
21355 previous instruction is not needed to compute the address. */
21356 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
21357 && !ix86_agi_dependent (dep_insn, insn))
21358 {
21359 /* Claim moves to take one cycle, as core can issue one load
21360 at time and the next load can start cycle later. */
21361 if (dep_insn_type == TYPE_IMOV
21362 || dep_insn_type == TYPE_FMOV)
21363 cost = 1;
21364 else if (cost > 2)
21365 cost -= 2;
21366 else
21367 cost = 1;
21368 }
21369 break;
21370
21371 case PROCESSOR_ATHLON:
21372 case PROCESSOR_K8:
21373 case PROCESSOR_AMDFAM10:
21374 case PROCESSOR_BDVER1:
21375 case PROCESSOR_ATOM:
21376 case PROCESSOR_GENERIC32:
21377 case PROCESSOR_GENERIC64:
21378 memory = get_attr_memory (insn);
21379
21380 /* Show ability of reorder buffer to hide latency of load by executing
21381 in parallel with previous instruction in case
21382 previous instruction is not needed to compute the address. */
21383 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
21384 && !ix86_agi_dependent (dep_insn, insn))
21385 {
21386 enum attr_unit unit = get_attr_unit (insn);
21387 int loadcost = 3;
21388
21389 /* Because of the difference between the length of integer and
21390 floating unit pipeline preparation stages, the memory operands
21391 for floating point are cheaper.
21392
21393 ??? For Athlon it the difference is most probably 2. */
21394 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
21395 loadcost = 3;
21396 else
21397 loadcost = TARGET_ATHLON ? 2 : 0;
21398
21399 if (cost >= loadcost)
21400 cost -= loadcost;
21401 else
21402 cost = 0;
21403 }
21404
21405 default:
21406 break;
21407 }
21408
21409 return cost;
21410 }
21411
21412 /* How many alternative schedules to try. This should be as wide as the
21413 scheduling freedom in the DFA, but no wider. Making this value too
21414 large results extra work for the scheduler. */
21415
21416 static int
21417 ia32_multipass_dfa_lookahead (void)
21418 {
21419 switch (ix86_tune)
21420 {
21421 case PROCESSOR_PENTIUM:
21422 return 2;
21423
21424 case PROCESSOR_PENTIUMPRO:
21425 case PROCESSOR_K6:
21426 return 1;
21427
21428 default:
21429 return 0;
21430 }
21431 }
21432
21433 \f
21434 /* Compute the alignment given to a constant that is being placed in memory.
21435 EXP is the constant and ALIGN is the alignment that the object would
21436 ordinarily have.
21437 The value of this function is used instead of that alignment to align
21438 the object. */
21439
21440 int
21441 ix86_constant_alignment (tree exp, int align)
21442 {
21443 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
21444 || TREE_CODE (exp) == INTEGER_CST)
21445 {
21446 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
21447 return 64;
21448 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
21449 return 128;
21450 }
21451 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
21452 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
21453 return BITS_PER_WORD;
21454
21455 return align;
21456 }
21457
21458 /* Compute the alignment for a static variable.
21459 TYPE is the data type, and ALIGN is the alignment that
21460 the object would ordinarily have. The value of this function is used
21461 instead of that alignment to align the object. */
21462
21463 int
21464 ix86_data_alignment (tree type, int align)
21465 {
21466 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
21467
21468 if (AGGREGATE_TYPE_P (type)
21469 && TYPE_SIZE (type)
21470 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
21471 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
21472 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
21473 && align < max_align)
21474 align = max_align;
21475
21476 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
21477 to 16byte boundary. */
21478 if (TARGET_64BIT)
21479 {
21480 if (AGGREGATE_TYPE_P (type)
21481 && TYPE_SIZE (type)
21482 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
21483 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
21484 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
21485 return 128;
21486 }
21487
21488 if (TREE_CODE (type) == ARRAY_TYPE)
21489 {
21490 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
21491 return 64;
21492 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
21493 return 128;
21494 }
21495 else if (TREE_CODE (type) == COMPLEX_TYPE)
21496 {
21497
21498 if (TYPE_MODE (type) == DCmode && align < 64)
21499 return 64;
21500 if ((TYPE_MODE (type) == XCmode
21501 || TYPE_MODE (type) == TCmode) && align < 128)
21502 return 128;
21503 }
21504 else if ((TREE_CODE (type) == RECORD_TYPE
21505 || TREE_CODE (type) == UNION_TYPE
21506 || TREE_CODE (type) == QUAL_UNION_TYPE)
21507 && TYPE_FIELDS (type))
21508 {
21509 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
21510 return 64;
21511 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
21512 return 128;
21513 }
21514 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
21515 || TREE_CODE (type) == INTEGER_TYPE)
21516 {
21517 if (TYPE_MODE (type) == DFmode && align < 64)
21518 return 64;
21519 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
21520 return 128;
21521 }
21522
21523 return align;
21524 }
21525
21526 /* Compute the alignment for a local variable or a stack slot. EXP is
21527 the data type or decl itself, MODE is the widest mode available and
21528 ALIGN is the alignment that the object would ordinarily have. The
21529 value of this macro is used instead of that alignment to align the
21530 object. */
21531
21532 unsigned int
21533 ix86_local_alignment (tree exp, enum machine_mode mode,
21534 unsigned int align)
21535 {
21536 tree type, decl;
21537
21538 if (exp && DECL_P (exp))
21539 {
21540 type = TREE_TYPE (exp);
21541 decl = exp;
21542 }
21543 else
21544 {
21545 type = exp;
21546 decl = NULL;
21547 }
21548
21549 /* Don't do dynamic stack realignment for long long objects with
21550 -mpreferred-stack-boundary=2. */
21551 if (!TARGET_64BIT
21552 && align == 64
21553 && ix86_preferred_stack_boundary < 64
21554 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
21555 && (!type || !TYPE_USER_ALIGN (type))
21556 && (!decl || !DECL_USER_ALIGN (decl)))
21557 align = 32;
21558
21559 /* If TYPE is NULL, we are allocating a stack slot for caller-save
21560 register in MODE. We will return the largest alignment of XF
21561 and DF. */
21562 if (!type)
21563 {
21564 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
21565 align = GET_MODE_ALIGNMENT (DFmode);
21566 return align;
21567 }
21568
21569 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
21570 to 16byte boundary. Exact wording is:
21571
21572 An array uses the same alignment as its elements, except that a local or
21573 global array variable of length at least 16 bytes or
21574 a C99 variable-length array variable always has alignment of at least 16 bytes.
21575
21576 This was added to allow use of aligned SSE instructions at arrays. This
21577 rule is meant for static storage (where compiler can not do the analysis
21578 by itself). We follow it for automatic variables only when convenient.
21579 We fully control everything in the function compiled and functions from
21580 other unit can not rely on the alignment.
21581
21582 Exclude va_list type. It is the common case of local array where
21583 we can not benefit from the alignment. */
21584 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
21585 && TARGET_SSE)
21586 {
21587 if (AGGREGATE_TYPE_P (type)
21588 && (TYPE_MAIN_VARIANT (type)
21589 != TYPE_MAIN_VARIANT (va_list_type_node))
21590 && TYPE_SIZE (type)
21591 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
21592 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
21593 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
21594 return 128;
21595 }
21596 if (TREE_CODE (type) == ARRAY_TYPE)
21597 {
21598 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
21599 return 64;
21600 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
21601 return 128;
21602 }
21603 else if (TREE_CODE (type) == COMPLEX_TYPE)
21604 {
21605 if (TYPE_MODE (type) == DCmode && align < 64)
21606 return 64;
21607 if ((TYPE_MODE (type) == XCmode
21608 || TYPE_MODE (type) == TCmode) && align < 128)
21609 return 128;
21610 }
21611 else if ((TREE_CODE (type) == RECORD_TYPE
21612 || TREE_CODE (type) == UNION_TYPE
21613 || TREE_CODE (type) == QUAL_UNION_TYPE)
21614 && TYPE_FIELDS (type))
21615 {
21616 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
21617 return 64;
21618 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
21619 return 128;
21620 }
21621 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
21622 || TREE_CODE (type) == INTEGER_TYPE)
21623 {
21624
21625 if (TYPE_MODE (type) == DFmode && align < 64)
21626 return 64;
21627 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
21628 return 128;
21629 }
21630 return align;
21631 }
21632
21633 /* Compute the minimum required alignment for dynamic stack realignment
21634 purposes for a local variable, parameter or a stack slot. EXP is
21635 the data type or decl itself, MODE is its mode and ALIGN is the
21636 alignment that the object would ordinarily have. */
21637
21638 unsigned int
21639 ix86_minimum_alignment (tree exp, enum machine_mode mode,
21640 unsigned int align)
21641 {
21642 tree type, decl;
21643
21644 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
21645 return align;
21646
21647 if (exp && DECL_P (exp))
21648 {
21649 type = TREE_TYPE (exp);
21650 decl = exp;
21651 }
21652 else
21653 {
21654 type = exp;
21655 decl = NULL;
21656 }
21657
21658 /* Don't do dynamic stack realignment for long long objects with
21659 -mpreferred-stack-boundary=2. */
21660 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
21661 && (!type || !TYPE_USER_ALIGN (type))
21662 && (!decl || !DECL_USER_ALIGN (decl)))
21663 return 32;
21664
21665 return align;
21666 }
21667 \f
21668 /* Find a location for the static chain incoming to a nested function.
21669 This is a register, unless all free registers are used by arguments. */
21670
21671 static rtx
21672 ix86_static_chain (const_tree fndecl, bool incoming_p)
21673 {
21674 unsigned regno;
21675
21676 if (!DECL_STATIC_CHAIN (fndecl))
21677 return NULL;
21678
21679 if (TARGET_64BIT)
21680 {
21681 /* We always use R10 in 64-bit mode. */
21682 regno = R10_REG;
21683 }
21684 else
21685 {
21686 tree fntype;
21687 /* By default in 32-bit mode we use ECX to pass the static chain. */
21688 regno = CX_REG;
21689
21690 fntype = TREE_TYPE (fndecl);
21691 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
21692 {
21693 /* Fastcall functions use ecx/edx for arguments, which leaves
21694 us with EAX for the static chain. */
21695 regno = AX_REG;
21696 }
21697 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
21698 {
21699 /* Thiscall functions use ecx for arguments, which leaves
21700 us with EAX for the static chain. */
21701 regno = AX_REG;
21702 }
21703 else if (ix86_function_regparm (fntype, fndecl) == 3)
21704 {
21705 /* For regparm 3, we have no free call-clobbered registers in
21706 which to store the static chain. In order to implement this,
21707 we have the trampoline push the static chain to the stack.
21708 However, we can't push a value below the return address when
21709 we call the nested function directly, so we have to use an
21710 alternate entry point. For this we use ESI, and have the
21711 alternate entry point push ESI, so that things appear the
21712 same once we're executing the nested function. */
21713 if (incoming_p)
21714 {
21715 if (fndecl == current_function_decl)
21716 ix86_static_chain_on_stack = true;
21717 return gen_frame_mem (SImode,
21718 plus_constant (arg_pointer_rtx, -8));
21719 }
21720 regno = SI_REG;
21721 }
21722 }
21723
21724 return gen_rtx_REG (Pmode, regno);
21725 }
21726
21727 /* Emit RTL insns to initialize the variable parts of a trampoline.
21728 FNDECL is the decl of the target address; M_TRAMP is a MEM for
21729 the trampoline, and CHAIN_VALUE is an RTX for the static chain
21730 to be passed to the target function. */
21731
21732 static void
21733 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
21734 {
21735 rtx mem, fnaddr;
21736
21737 fnaddr = XEXP (DECL_RTL (fndecl), 0);
21738
21739 if (!TARGET_64BIT)
21740 {
21741 rtx disp, chain;
21742 int opcode;
21743
21744 /* Depending on the static chain location, either load a register
21745 with a constant, or push the constant to the stack. All of the
21746 instructions are the same size. */
21747 chain = ix86_static_chain (fndecl, true);
21748 if (REG_P (chain))
21749 {
21750 if (REGNO (chain) == CX_REG)
21751 opcode = 0xb9;
21752 else if (REGNO (chain) == AX_REG)
21753 opcode = 0xb8;
21754 else
21755 gcc_unreachable ();
21756 }
21757 else
21758 opcode = 0x68;
21759
21760 mem = adjust_address (m_tramp, QImode, 0);
21761 emit_move_insn (mem, gen_int_mode (opcode, QImode));
21762
21763 mem = adjust_address (m_tramp, SImode, 1);
21764 emit_move_insn (mem, chain_value);
21765
21766 /* Compute offset from the end of the jmp to the target function.
21767 In the case in which the trampoline stores the static chain on
21768 the stack, we need to skip the first insn which pushes the
21769 (call-saved) register static chain; this push is 1 byte. */
21770 disp = expand_binop (SImode, sub_optab, fnaddr,
21771 plus_constant (XEXP (m_tramp, 0),
21772 MEM_P (chain) ? 9 : 10),
21773 NULL_RTX, 1, OPTAB_DIRECT);
21774
21775 mem = adjust_address (m_tramp, QImode, 5);
21776 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
21777
21778 mem = adjust_address (m_tramp, SImode, 6);
21779 emit_move_insn (mem, disp);
21780 }
21781 else
21782 {
21783 int offset = 0;
21784
21785 /* Load the function address to r11. Try to load address using
21786 the shorter movl instead of movabs. We may want to support
21787 movq for kernel mode, but kernel does not use trampolines at
21788 the moment. */
21789 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
21790 {
21791 fnaddr = copy_to_mode_reg (DImode, fnaddr);
21792
21793 mem = adjust_address (m_tramp, HImode, offset);
21794 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
21795
21796 mem = adjust_address (m_tramp, SImode, offset + 2);
21797 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
21798 offset += 6;
21799 }
21800 else
21801 {
21802 mem = adjust_address (m_tramp, HImode, offset);
21803 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
21804
21805 mem = adjust_address (m_tramp, DImode, offset + 2);
21806 emit_move_insn (mem, fnaddr);
21807 offset += 10;
21808 }
21809
21810 /* Load static chain using movabs to r10. */
21811 mem = adjust_address (m_tramp, HImode, offset);
21812 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
21813
21814 mem = adjust_address (m_tramp, DImode, offset + 2);
21815 emit_move_insn (mem, chain_value);
21816 offset += 10;
21817
21818 /* Jump to r11; the last (unused) byte is a nop, only there to
21819 pad the write out to a single 32-bit store. */
21820 mem = adjust_address (m_tramp, SImode, offset);
21821 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
21822 offset += 4;
21823
21824 gcc_assert (offset <= TRAMPOLINE_SIZE);
21825 }
21826
21827 #ifdef ENABLE_EXECUTE_STACK
21828 #ifdef CHECK_EXECUTE_STACK_ENABLED
21829 if (CHECK_EXECUTE_STACK_ENABLED)
21830 #endif
21831 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
21832 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
21833 #endif
21834 }
21835 \f
21836 /* The following file contains several enumerations and data structures
21837 built from the definitions in i386-builtin-types.def. */
21838
21839 #include "i386-builtin-types.inc"
21840
21841 /* Table for the ix86 builtin non-function types. */
21842 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
21843
21844 /* Retrieve an element from the above table, building some of
21845 the types lazily. */
21846
21847 static tree
21848 ix86_get_builtin_type (enum ix86_builtin_type tcode)
21849 {
21850 unsigned int index;
21851 tree type, itype;
21852
21853 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
21854
21855 type = ix86_builtin_type_tab[(int) tcode];
21856 if (type != NULL)
21857 return type;
21858
21859 gcc_assert (tcode > IX86_BT_LAST_PRIM);
21860 if (tcode <= IX86_BT_LAST_VECT)
21861 {
21862 enum machine_mode mode;
21863
21864 index = tcode - IX86_BT_LAST_PRIM - 1;
21865 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
21866 mode = ix86_builtin_type_vect_mode[index];
21867
21868 type = build_vector_type_for_mode (itype, mode);
21869 }
21870 else
21871 {
21872 int quals;
21873
21874 index = tcode - IX86_BT_LAST_VECT - 1;
21875 if (tcode <= IX86_BT_LAST_PTR)
21876 quals = TYPE_UNQUALIFIED;
21877 else
21878 quals = TYPE_QUAL_CONST;
21879
21880 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
21881 if (quals != TYPE_UNQUALIFIED)
21882 itype = build_qualified_type (itype, quals);
21883
21884 type = build_pointer_type (itype);
21885 }
21886
21887 ix86_builtin_type_tab[(int) tcode] = type;
21888 return type;
21889 }
21890
21891 /* Table for the ix86 builtin function types. */
21892 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
21893
21894 /* Retrieve an element from the above table, building some of
21895 the types lazily. */
21896
21897 static tree
21898 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
21899 {
21900 tree type;
21901
21902 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
21903
21904 type = ix86_builtin_func_type_tab[(int) tcode];
21905 if (type != NULL)
21906 return type;
21907
21908 if (tcode <= IX86_BT_LAST_FUNC)
21909 {
21910 unsigned start = ix86_builtin_func_start[(int) tcode];
21911 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
21912 tree rtype, atype, args = void_list_node;
21913 unsigned i;
21914
21915 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
21916 for (i = after - 1; i > start; --i)
21917 {
21918 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
21919 args = tree_cons (NULL, atype, args);
21920 }
21921
21922 type = build_function_type (rtype, args);
21923 }
21924 else
21925 {
21926 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
21927 enum ix86_builtin_func_type icode;
21928
21929 icode = ix86_builtin_func_alias_base[index];
21930 type = ix86_get_builtin_func_type (icode);
21931 }
21932
21933 ix86_builtin_func_type_tab[(int) tcode] = type;
21934 return type;
21935 }
21936
21937
21938 /* Codes for all the SSE/MMX builtins. */
21939 enum ix86_builtins
21940 {
21941 IX86_BUILTIN_ADDPS,
21942 IX86_BUILTIN_ADDSS,
21943 IX86_BUILTIN_DIVPS,
21944 IX86_BUILTIN_DIVSS,
21945 IX86_BUILTIN_MULPS,
21946 IX86_BUILTIN_MULSS,
21947 IX86_BUILTIN_SUBPS,
21948 IX86_BUILTIN_SUBSS,
21949
21950 IX86_BUILTIN_CMPEQPS,
21951 IX86_BUILTIN_CMPLTPS,
21952 IX86_BUILTIN_CMPLEPS,
21953 IX86_BUILTIN_CMPGTPS,
21954 IX86_BUILTIN_CMPGEPS,
21955 IX86_BUILTIN_CMPNEQPS,
21956 IX86_BUILTIN_CMPNLTPS,
21957 IX86_BUILTIN_CMPNLEPS,
21958 IX86_BUILTIN_CMPNGTPS,
21959 IX86_BUILTIN_CMPNGEPS,
21960 IX86_BUILTIN_CMPORDPS,
21961 IX86_BUILTIN_CMPUNORDPS,
21962 IX86_BUILTIN_CMPEQSS,
21963 IX86_BUILTIN_CMPLTSS,
21964 IX86_BUILTIN_CMPLESS,
21965 IX86_BUILTIN_CMPNEQSS,
21966 IX86_BUILTIN_CMPNLTSS,
21967 IX86_BUILTIN_CMPNLESS,
21968 IX86_BUILTIN_CMPNGTSS,
21969 IX86_BUILTIN_CMPNGESS,
21970 IX86_BUILTIN_CMPORDSS,
21971 IX86_BUILTIN_CMPUNORDSS,
21972
21973 IX86_BUILTIN_COMIEQSS,
21974 IX86_BUILTIN_COMILTSS,
21975 IX86_BUILTIN_COMILESS,
21976 IX86_BUILTIN_COMIGTSS,
21977 IX86_BUILTIN_COMIGESS,
21978 IX86_BUILTIN_COMINEQSS,
21979 IX86_BUILTIN_UCOMIEQSS,
21980 IX86_BUILTIN_UCOMILTSS,
21981 IX86_BUILTIN_UCOMILESS,
21982 IX86_BUILTIN_UCOMIGTSS,
21983 IX86_BUILTIN_UCOMIGESS,
21984 IX86_BUILTIN_UCOMINEQSS,
21985
21986 IX86_BUILTIN_CVTPI2PS,
21987 IX86_BUILTIN_CVTPS2PI,
21988 IX86_BUILTIN_CVTSI2SS,
21989 IX86_BUILTIN_CVTSI642SS,
21990 IX86_BUILTIN_CVTSS2SI,
21991 IX86_BUILTIN_CVTSS2SI64,
21992 IX86_BUILTIN_CVTTPS2PI,
21993 IX86_BUILTIN_CVTTSS2SI,
21994 IX86_BUILTIN_CVTTSS2SI64,
21995
21996 IX86_BUILTIN_MAXPS,
21997 IX86_BUILTIN_MAXSS,
21998 IX86_BUILTIN_MINPS,
21999 IX86_BUILTIN_MINSS,
22000
22001 IX86_BUILTIN_LOADUPS,
22002 IX86_BUILTIN_STOREUPS,
22003 IX86_BUILTIN_MOVSS,
22004
22005 IX86_BUILTIN_MOVHLPS,
22006 IX86_BUILTIN_MOVLHPS,
22007 IX86_BUILTIN_LOADHPS,
22008 IX86_BUILTIN_LOADLPS,
22009 IX86_BUILTIN_STOREHPS,
22010 IX86_BUILTIN_STORELPS,
22011
22012 IX86_BUILTIN_MASKMOVQ,
22013 IX86_BUILTIN_MOVMSKPS,
22014 IX86_BUILTIN_PMOVMSKB,
22015
22016 IX86_BUILTIN_MOVNTPS,
22017 IX86_BUILTIN_MOVNTQ,
22018
22019 IX86_BUILTIN_LOADDQU,
22020 IX86_BUILTIN_STOREDQU,
22021
22022 IX86_BUILTIN_PACKSSWB,
22023 IX86_BUILTIN_PACKSSDW,
22024 IX86_BUILTIN_PACKUSWB,
22025
22026 IX86_BUILTIN_PADDB,
22027 IX86_BUILTIN_PADDW,
22028 IX86_BUILTIN_PADDD,
22029 IX86_BUILTIN_PADDQ,
22030 IX86_BUILTIN_PADDSB,
22031 IX86_BUILTIN_PADDSW,
22032 IX86_BUILTIN_PADDUSB,
22033 IX86_BUILTIN_PADDUSW,
22034 IX86_BUILTIN_PSUBB,
22035 IX86_BUILTIN_PSUBW,
22036 IX86_BUILTIN_PSUBD,
22037 IX86_BUILTIN_PSUBQ,
22038 IX86_BUILTIN_PSUBSB,
22039 IX86_BUILTIN_PSUBSW,
22040 IX86_BUILTIN_PSUBUSB,
22041 IX86_BUILTIN_PSUBUSW,
22042
22043 IX86_BUILTIN_PAND,
22044 IX86_BUILTIN_PANDN,
22045 IX86_BUILTIN_POR,
22046 IX86_BUILTIN_PXOR,
22047
22048 IX86_BUILTIN_PAVGB,
22049 IX86_BUILTIN_PAVGW,
22050
22051 IX86_BUILTIN_PCMPEQB,
22052 IX86_BUILTIN_PCMPEQW,
22053 IX86_BUILTIN_PCMPEQD,
22054 IX86_BUILTIN_PCMPGTB,
22055 IX86_BUILTIN_PCMPGTW,
22056 IX86_BUILTIN_PCMPGTD,
22057
22058 IX86_BUILTIN_PMADDWD,
22059
22060 IX86_BUILTIN_PMAXSW,
22061 IX86_BUILTIN_PMAXUB,
22062 IX86_BUILTIN_PMINSW,
22063 IX86_BUILTIN_PMINUB,
22064
22065 IX86_BUILTIN_PMULHUW,
22066 IX86_BUILTIN_PMULHW,
22067 IX86_BUILTIN_PMULLW,
22068
22069 IX86_BUILTIN_PSADBW,
22070 IX86_BUILTIN_PSHUFW,
22071
22072 IX86_BUILTIN_PSLLW,
22073 IX86_BUILTIN_PSLLD,
22074 IX86_BUILTIN_PSLLQ,
22075 IX86_BUILTIN_PSRAW,
22076 IX86_BUILTIN_PSRAD,
22077 IX86_BUILTIN_PSRLW,
22078 IX86_BUILTIN_PSRLD,
22079 IX86_BUILTIN_PSRLQ,
22080 IX86_BUILTIN_PSLLWI,
22081 IX86_BUILTIN_PSLLDI,
22082 IX86_BUILTIN_PSLLQI,
22083 IX86_BUILTIN_PSRAWI,
22084 IX86_BUILTIN_PSRADI,
22085 IX86_BUILTIN_PSRLWI,
22086 IX86_BUILTIN_PSRLDI,
22087 IX86_BUILTIN_PSRLQI,
22088
22089 IX86_BUILTIN_PUNPCKHBW,
22090 IX86_BUILTIN_PUNPCKHWD,
22091 IX86_BUILTIN_PUNPCKHDQ,
22092 IX86_BUILTIN_PUNPCKLBW,
22093 IX86_BUILTIN_PUNPCKLWD,
22094 IX86_BUILTIN_PUNPCKLDQ,
22095
22096 IX86_BUILTIN_SHUFPS,
22097
22098 IX86_BUILTIN_RCPPS,
22099 IX86_BUILTIN_RCPSS,
22100 IX86_BUILTIN_RSQRTPS,
22101 IX86_BUILTIN_RSQRTPS_NR,
22102 IX86_BUILTIN_RSQRTSS,
22103 IX86_BUILTIN_RSQRTF,
22104 IX86_BUILTIN_SQRTPS,
22105 IX86_BUILTIN_SQRTPS_NR,
22106 IX86_BUILTIN_SQRTSS,
22107
22108 IX86_BUILTIN_UNPCKHPS,
22109 IX86_BUILTIN_UNPCKLPS,
22110
22111 IX86_BUILTIN_ANDPS,
22112 IX86_BUILTIN_ANDNPS,
22113 IX86_BUILTIN_ORPS,
22114 IX86_BUILTIN_XORPS,
22115
22116 IX86_BUILTIN_EMMS,
22117 IX86_BUILTIN_LDMXCSR,
22118 IX86_BUILTIN_STMXCSR,
22119 IX86_BUILTIN_SFENCE,
22120
22121 /* 3DNow! Original */
22122 IX86_BUILTIN_FEMMS,
22123 IX86_BUILTIN_PAVGUSB,
22124 IX86_BUILTIN_PF2ID,
22125 IX86_BUILTIN_PFACC,
22126 IX86_BUILTIN_PFADD,
22127 IX86_BUILTIN_PFCMPEQ,
22128 IX86_BUILTIN_PFCMPGE,
22129 IX86_BUILTIN_PFCMPGT,
22130 IX86_BUILTIN_PFMAX,
22131 IX86_BUILTIN_PFMIN,
22132 IX86_BUILTIN_PFMUL,
22133 IX86_BUILTIN_PFRCP,
22134 IX86_BUILTIN_PFRCPIT1,
22135 IX86_BUILTIN_PFRCPIT2,
22136 IX86_BUILTIN_PFRSQIT1,
22137 IX86_BUILTIN_PFRSQRT,
22138 IX86_BUILTIN_PFSUB,
22139 IX86_BUILTIN_PFSUBR,
22140 IX86_BUILTIN_PI2FD,
22141 IX86_BUILTIN_PMULHRW,
22142
22143 /* 3DNow! Athlon Extensions */
22144 IX86_BUILTIN_PF2IW,
22145 IX86_BUILTIN_PFNACC,
22146 IX86_BUILTIN_PFPNACC,
22147 IX86_BUILTIN_PI2FW,
22148 IX86_BUILTIN_PSWAPDSI,
22149 IX86_BUILTIN_PSWAPDSF,
22150
22151 /* SSE2 */
22152 IX86_BUILTIN_ADDPD,
22153 IX86_BUILTIN_ADDSD,
22154 IX86_BUILTIN_DIVPD,
22155 IX86_BUILTIN_DIVSD,
22156 IX86_BUILTIN_MULPD,
22157 IX86_BUILTIN_MULSD,
22158 IX86_BUILTIN_SUBPD,
22159 IX86_BUILTIN_SUBSD,
22160
22161 IX86_BUILTIN_CMPEQPD,
22162 IX86_BUILTIN_CMPLTPD,
22163 IX86_BUILTIN_CMPLEPD,
22164 IX86_BUILTIN_CMPGTPD,
22165 IX86_BUILTIN_CMPGEPD,
22166 IX86_BUILTIN_CMPNEQPD,
22167 IX86_BUILTIN_CMPNLTPD,
22168 IX86_BUILTIN_CMPNLEPD,
22169 IX86_BUILTIN_CMPNGTPD,
22170 IX86_BUILTIN_CMPNGEPD,
22171 IX86_BUILTIN_CMPORDPD,
22172 IX86_BUILTIN_CMPUNORDPD,
22173 IX86_BUILTIN_CMPEQSD,
22174 IX86_BUILTIN_CMPLTSD,
22175 IX86_BUILTIN_CMPLESD,
22176 IX86_BUILTIN_CMPNEQSD,
22177 IX86_BUILTIN_CMPNLTSD,
22178 IX86_BUILTIN_CMPNLESD,
22179 IX86_BUILTIN_CMPORDSD,
22180 IX86_BUILTIN_CMPUNORDSD,
22181
22182 IX86_BUILTIN_COMIEQSD,
22183 IX86_BUILTIN_COMILTSD,
22184 IX86_BUILTIN_COMILESD,
22185 IX86_BUILTIN_COMIGTSD,
22186 IX86_BUILTIN_COMIGESD,
22187 IX86_BUILTIN_COMINEQSD,
22188 IX86_BUILTIN_UCOMIEQSD,
22189 IX86_BUILTIN_UCOMILTSD,
22190 IX86_BUILTIN_UCOMILESD,
22191 IX86_BUILTIN_UCOMIGTSD,
22192 IX86_BUILTIN_UCOMIGESD,
22193 IX86_BUILTIN_UCOMINEQSD,
22194
22195 IX86_BUILTIN_MAXPD,
22196 IX86_BUILTIN_MAXSD,
22197 IX86_BUILTIN_MINPD,
22198 IX86_BUILTIN_MINSD,
22199
22200 IX86_BUILTIN_ANDPD,
22201 IX86_BUILTIN_ANDNPD,
22202 IX86_BUILTIN_ORPD,
22203 IX86_BUILTIN_XORPD,
22204
22205 IX86_BUILTIN_SQRTPD,
22206 IX86_BUILTIN_SQRTSD,
22207
22208 IX86_BUILTIN_UNPCKHPD,
22209 IX86_BUILTIN_UNPCKLPD,
22210
22211 IX86_BUILTIN_SHUFPD,
22212
22213 IX86_BUILTIN_LOADUPD,
22214 IX86_BUILTIN_STOREUPD,
22215 IX86_BUILTIN_MOVSD,
22216
22217 IX86_BUILTIN_LOADHPD,
22218 IX86_BUILTIN_LOADLPD,
22219
22220 IX86_BUILTIN_CVTDQ2PD,
22221 IX86_BUILTIN_CVTDQ2PS,
22222
22223 IX86_BUILTIN_CVTPD2DQ,
22224 IX86_BUILTIN_CVTPD2PI,
22225 IX86_BUILTIN_CVTPD2PS,
22226 IX86_BUILTIN_CVTTPD2DQ,
22227 IX86_BUILTIN_CVTTPD2PI,
22228
22229 IX86_BUILTIN_CVTPI2PD,
22230 IX86_BUILTIN_CVTSI2SD,
22231 IX86_BUILTIN_CVTSI642SD,
22232
22233 IX86_BUILTIN_CVTSD2SI,
22234 IX86_BUILTIN_CVTSD2SI64,
22235 IX86_BUILTIN_CVTSD2SS,
22236 IX86_BUILTIN_CVTSS2SD,
22237 IX86_BUILTIN_CVTTSD2SI,
22238 IX86_BUILTIN_CVTTSD2SI64,
22239
22240 IX86_BUILTIN_CVTPS2DQ,
22241 IX86_BUILTIN_CVTPS2PD,
22242 IX86_BUILTIN_CVTTPS2DQ,
22243
22244 IX86_BUILTIN_MOVNTI,
22245 IX86_BUILTIN_MOVNTPD,
22246 IX86_BUILTIN_MOVNTDQ,
22247
22248 IX86_BUILTIN_MOVQ128,
22249
22250 /* SSE2 MMX */
22251 IX86_BUILTIN_MASKMOVDQU,
22252 IX86_BUILTIN_MOVMSKPD,
22253 IX86_BUILTIN_PMOVMSKB128,
22254
22255 IX86_BUILTIN_PACKSSWB128,
22256 IX86_BUILTIN_PACKSSDW128,
22257 IX86_BUILTIN_PACKUSWB128,
22258
22259 IX86_BUILTIN_PADDB128,
22260 IX86_BUILTIN_PADDW128,
22261 IX86_BUILTIN_PADDD128,
22262 IX86_BUILTIN_PADDQ128,
22263 IX86_BUILTIN_PADDSB128,
22264 IX86_BUILTIN_PADDSW128,
22265 IX86_BUILTIN_PADDUSB128,
22266 IX86_BUILTIN_PADDUSW128,
22267 IX86_BUILTIN_PSUBB128,
22268 IX86_BUILTIN_PSUBW128,
22269 IX86_BUILTIN_PSUBD128,
22270 IX86_BUILTIN_PSUBQ128,
22271 IX86_BUILTIN_PSUBSB128,
22272 IX86_BUILTIN_PSUBSW128,
22273 IX86_BUILTIN_PSUBUSB128,
22274 IX86_BUILTIN_PSUBUSW128,
22275
22276 IX86_BUILTIN_PAND128,
22277 IX86_BUILTIN_PANDN128,
22278 IX86_BUILTIN_POR128,
22279 IX86_BUILTIN_PXOR128,
22280
22281 IX86_BUILTIN_PAVGB128,
22282 IX86_BUILTIN_PAVGW128,
22283
22284 IX86_BUILTIN_PCMPEQB128,
22285 IX86_BUILTIN_PCMPEQW128,
22286 IX86_BUILTIN_PCMPEQD128,
22287 IX86_BUILTIN_PCMPGTB128,
22288 IX86_BUILTIN_PCMPGTW128,
22289 IX86_BUILTIN_PCMPGTD128,
22290
22291 IX86_BUILTIN_PMADDWD128,
22292
22293 IX86_BUILTIN_PMAXSW128,
22294 IX86_BUILTIN_PMAXUB128,
22295 IX86_BUILTIN_PMINSW128,
22296 IX86_BUILTIN_PMINUB128,
22297
22298 IX86_BUILTIN_PMULUDQ,
22299 IX86_BUILTIN_PMULUDQ128,
22300 IX86_BUILTIN_PMULHUW128,
22301 IX86_BUILTIN_PMULHW128,
22302 IX86_BUILTIN_PMULLW128,
22303
22304 IX86_BUILTIN_PSADBW128,
22305 IX86_BUILTIN_PSHUFHW,
22306 IX86_BUILTIN_PSHUFLW,
22307 IX86_BUILTIN_PSHUFD,
22308
22309 IX86_BUILTIN_PSLLDQI128,
22310 IX86_BUILTIN_PSLLWI128,
22311 IX86_BUILTIN_PSLLDI128,
22312 IX86_BUILTIN_PSLLQI128,
22313 IX86_BUILTIN_PSRAWI128,
22314 IX86_BUILTIN_PSRADI128,
22315 IX86_BUILTIN_PSRLDQI128,
22316 IX86_BUILTIN_PSRLWI128,
22317 IX86_BUILTIN_PSRLDI128,
22318 IX86_BUILTIN_PSRLQI128,
22319
22320 IX86_BUILTIN_PSLLDQ128,
22321 IX86_BUILTIN_PSLLW128,
22322 IX86_BUILTIN_PSLLD128,
22323 IX86_BUILTIN_PSLLQ128,
22324 IX86_BUILTIN_PSRAW128,
22325 IX86_BUILTIN_PSRAD128,
22326 IX86_BUILTIN_PSRLW128,
22327 IX86_BUILTIN_PSRLD128,
22328 IX86_BUILTIN_PSRLQ128,
22329
22330 IX86_BUILTIN_PUNPCKHBW128,
22331 IX86_BUILTIN_PUNPCKHWD128,
22332 IX86_BUILTIN_PUNPCKHDQ128,
22333 IX86_BUILTIN_PUNPCKHQDQ128,
22334 IX86_BUILTIN_PUNPCKLBW128,
22335 IX86_BUILTIN_PUNPCKLWD128,
22336 IX86_BUILTIN_PUNPCKLDQ128,
22337 IX86_BUILTIN_PUNPCKLQDQ128,
22338
22339 IX86_BUILTIN_CLFLUSH,
22340 IX86_BUILTIN_MFENCE,
22341 IX86_BUILTIN_LFENCE,
22342
22343 IX86_BUILTIN_BSRSI,
22344 IX86_BUILTIN_BSRDI,
22345 IX86_BUILTIN_RDPMC,
22346 IX86_BUILTIN_RDTSC,
22347 IX86_BUILTIN_RDTSCP,
22348 IX86_BUILTIN_ROLQI,
22349 IX86_BUILTIN_ROLHI,
22350 IX86_BUILTIN_RORQI,
22351 IX86_BUILTIN_RORHI,
22352
22353 /* SSE3. */
22354 IX86_BUILTIN_ADDSUBPS,
22355 IX86_BUILTIN_HADDPS,
22356 IX86_BUILTIN_HSUBPS,
22357 IX86_BUILTIN_MOVSHDUP,
22358 IX86_BUILTIN_MOVSLDUP,
22359 IX86_BUILTIN_ADDSUBPD,
22360 IX86_BUILTIN_HADDPD,
22361 IX86_BUILTIN_HSUBPD,
22362 IX86_BUILTIN_LDDQU,
22363
22364 IX86_BUILTIN_MONITOR,
22365 IX86_BUILTIN_MWAIT,
22366
22367 /* SSSE3. */
22368 IX86_BUILTIN_PHADDW,
22369 IX86_BUILTIN_PHADDD,
22370 IX86_BUILTIN_PHADDSW,
22371 IX86_BUILTIN_PHSUBW,
22372 IX86_BUILTIN_PHSUBD,
22373 IX86_BUILTIN_PHSUBSW,
22374 IX86_BUILTIN_PMADDUBSW,
22375 IX86_BUILTIN_PMULHRSW,
22376 IX86_BUILTIN_PSHUFB,
22377 IX86_BUILTIN_PSIGNB,
22378 IX86_BUILTIN_PSIGNW,
22379 IX86_BUILTIN_PSIGND,
22380 IX86_BUILTIN_PALIGNR,
22381 IX86_BUILTIN_PABSB,
22382 IX86_BUILTIN_PABSW,
22383 IX86_BUILTIN_PABSD,
22384
22385 IX86_BUILTIN_PHADDW128,
22386 IX86_BUILTIN_PHADDD128,
22387 IX86_BUILTIN_PHADDSW128,
22388 IX86_BUILTIN_PHSUBW128,
22389 IX86_BUILTIN_PHSUBD128,
22390 IX86_BUILTIN_PHSUBSW128,
22391 IX86_BUILTIN_PMADDUBSW128,
22392 IX86_BUILTIN_PMULHRSW128,
22393 IX86_BUILTIN_PSHUFB128,
22394 IX86_BUILTIN_PSIGNB128,
22395 IX86_BUILTIN_PSIGNW128,
22396 IX86_BUILTIN_PSIGND128,
22397 IX86_BUILTIN_PALIGNR128,
22398 IX86_BUILTIN_PABSB128,
22399 IX86_BUILTIN_PABSW128,
22400 IX86_BUILTIN_PABSD128,
22401
22402 /* AMDFAM10 - SSE4A New Instructions. */
22403 IX86_BUILTIN_MOVNTSD,
22404 IX86_BUILTIN_MOVNTSS,
22405 IX86_BUILTIN_EXTRQI,
22406 IX86_BUILTIN_EXTRQ,
22407 IX86_BUILTIN_INSERTQI,
22408 IX86_BUILTIN_INSERTQ,
22409
22410 /* SSE4.1. */
22411 IX86_BUILTIN_BLENDPD,
22412 IX86_BUILTIN_BLENDPS,
22413 IX86_BUILTIN_BLENDVPD,
22414 IX86_BUILTIN_BLENDVPS,
22415 IX86_BUILTIN_PBLENDVB128,
22416 IX86_BUILTIN_PBLENDW128,
22417
22418 IX86_BUILTIN_DPPD,
22419 IX86_BUILTIN_DPPS,
22420
22421 IX86_BUILTIN_INSERTPS128,
22422
22423 IX86_BUILTIN_MOVNTDQA,
22424 IX86_BUILTIN_MPSADBW128,
22425 IX86_BUILTIN_PACKUSDW128,
22426 IX86_BUILTIN_PCMPEQQ,
22427 IX86_BUILTIN_PHMINPOSUW128,
22428
22429 IX86_BUILTIN_PMAXSB128,
22430 IX86_BUILTIN_PMAXSD128,
22431 IX86_BUILTIN_PMAXUD128,
22432 IX86_BUILTIN_PMAXUW128,
22433
22434 IX86_BUILTIN_PMINSB128,
22435 IX86_BUILTIN_PMINSD128,
22436 IX86_BUILTIN_PMINUD128,
22437 IX86_BUILTIN_PMINUW128,
22438
22439 IX86_BUILTIN_PMOVSXBW128,
22440 IX86_BUILTIN_PMOVSXBD128,
22441 IX86_BUILTIN_PMOVSXBQ128,
22442 IX86_BUILTIN_PMOVSXWD128,
22443 IX86_BUILTIN_PMOVSXWQ128,
22444 IX86_BUILTIN_PMOVSXDQ128,
22445
22446 IX86_BUILTIN_PMOVZXBW128,
22447 IX86_BUILTIN_PMOVZXBD128,
22448 IX86_BUILTIN_PMOVZXBQ128,
22449 IX86_BUILTIN_PMOVZXWD128,
22450 IX86_BUILTIN_PMOVZXWQ128,
22451 IX86_BUILTIN_PMOVZXDQ128,
22452
22453 IX86_BUILTIN_PMULDQ128,
22454 IX86_BUILTIN_PMULLD128,
22455
22456 IX86_BUILTIN_ROUNDPD,
22457 IX86_BUILTIN_ROUNDPS,
22458 IX86_BUILTIN_ROUNDSD,
22459 IX86_BUILTIN_ROUNDSS,
22460
22461 IX86_BUILTIN_PTESTZ,
22462 IX86_BUILTIN_PTESTC,
22463 IX86_BUILTIN_PTESTNZC,
22464
22465 IX86_BUILTIN_VEC_INIT_V2SI,
22466 IX86_BUILTIN_VEC_INIT_V4HI,
22467 IX86_BUILTIN_VEC_INIT_V8QI,
22468 IX86_BUILTIN_VEC_EXT_V2DF,
22469 IX86_BUILTIN_VEC_EXT_V2DI,
22470 IX86_BUILTIN_VEC_EXT_V4SF,
22471 IX86_BUILTIN_VEC_EXT_V4SI,
22472 IX86_BUILTIN_VEC_EXT_V8HI,
22473 IX86_BUILTIN_VEC_EXT_V2SI,
22474 IX86_BUILTIN_VEC_EXT_V4HI,
22475 IX86_BUILTIN_VEC_EXT_V16QI,
22476 IX86_BUILTIN_VEC_SET_V2DI,
22477 IX86_BUILTIN_VEC_SET_V4SF,
22478 IX86_BUILTIN_VEC_SET_V4SI,
22479 IX86_BUILTIN_VEC_SET_V8HI,
22480 IX86_BUILTIN_VEC_SET_V4HI,
22481 IX86_BUILTIN_VEC_SET_V16QI,
22482
22483 IX86_BUILTIN_VEC_PACK_SFIX,
22484
22485 /* SSE4.2. */
22486 IX86_BUILTIN_CRC32QI,
22487 IX86_BUILTIN_CRC32HI,
22488 IX86_BUILTIN_CRC32SI,
22489 IX86_BUILTIN_CRC32DI,
22490
22491 IX86_BUILTIN_PCMPESTRI128,
22492 IX86_BUILTIN_PCMPESTRM128,
22493 IX86_BUILTIN_PCMPESTRA128,
22494 IX86_BUILTIN_PCMPESTRC128,
22495 IX86_BUILTIN_PCMPESTRO128,
22496 IX86_BUILTIN_PCMPESTRS128,
22497 IX86_BUILTIN_PCMPESTRZ128,
22498 IX86_BUILTIN_PCMPISTRI128,
22499 IX86_BUILTIN_PCMPISTRM128,
22500 IX86_BUILTIN_PCMPISTRA128,
22501 IX86_BUILTIN_PCMPISTRC128,
22502 IX86_BUILTIN_PCMPISTRO128,
22503 IX86_BUILTIN_PCMPISTRS128,
22504 IX86_BUILTIN_PCMPISTRZ128,
22505
22506 IX86_BUILTIN_PCMPGTQ,
22507
22508 /* AES instructions */
22509 IX86_BUILTIN_AESENC128,
22510 IX86_BUILTIN_AESENCLAST128,
22511 IX86_BUILTIN_AESDEC128,
22512 IX86_BUILTIN_AESDECLAST128,
22513 IX86_BUILTIN_AESIMC128,
22514 IX86_BUILTIN_AESKEYGENASSIST128,
22515
22516 /* PCLMUL instruction */
22517 IX86_BUILTIN_PCLMULQDQ128,
22518
22519 /* AVX */
22520 IX86_BUILTIN_ADDPD256,
22521 IX86_BUILTIN_ADDPS256,
22522 IX86_BUILTIN_ADDSUBPD256,
22523 IX86_BUILTIN_ADDSUBPS256,
22524 IX86_BUILTIN_ANDPD256,
22525 IX86_BUILTIN_ANDPS256,
22526 IX86_BUILTIN_ANDNPD256,
22527 IX86_BUILTIN_ANDNPS256,
22528 IX86_BUILTIN_BLENDPD256,
22529 IX86_BUILTIN_BLENDPS256,
22530 IX86_BUILTIN_BLENDVPD256,
22531 IX86_BUILTIN_BLENDVPS256,
22532 IX86_BUILTIN_DIVPD256,
22533 IX86_BUILTIN_DIVPS256,
22534 IX86_BUILTIN_DPPS256,
22535 IX86_BUILTIN_HADDPD256,
22536 IX86_BUILTIN_HADDPS256,
22537 IX86_BUILTIN_HSUBPD256,
22538 IX86_BUILTIN_HSUBPS256,
22539 IX86_BUILTIN_MAXPD256,
22540 IX86_BUILTIN_MAXPS256,
22541 IX86_BUILTIN_MINPD256,
22542 IX86_BUILTIN_MINPS256,
22543 IX86_BUILTIN_MULPD256,
22544 IX86_BUILTIN_MULPS256,
22545 IX86_BUILTIN_ORPD256,
22546 IX86_BUILTIN_ORPS256,
22547 IX86_BUILTIN_SHUFPD256,
22548 IX86_BUILTIN_SHUFPS256,
22549 IX86_BUILTIN_SUBPD256,
22550 IX86_BUILTIN_SUBPS256,
22551 IX86_BUILTIN_XORPD256,
22552 IX86_BUILTIN_XORPS256,
22553 IX86_BUILTIN_CMPSD,
22554 IX86_BUILTIN_CMPSS,
22555 IX86_BUILTIN_CMPPD,
22556 IX86_BUILTIN_CMPPS,
22557 IX86_BUILTIN_CMPPD256,
22558 IX86_BUILTIN_CMPPS256,
22559 IX86_BUILTIN_CVTDQ2PD256,
22560 IX86_BUILTIN_CVTDQ2PS256,
22561 IX86_BUILTIN_CVTPD2PS256,
22562 IX86_BUILTIN_CVTPS2DQ256,
22563 IX86_BUILTIN_CVTPS2PD256,
22564 IX86_BUILTIN_CVTTPD2DQ256,
22565 IX86_BUILTIN_CVTPD2DQ256,
22566 IX86_BUILTIN_CVTTPS2DQ256,
22567 IX86_BUILTIN_EXTRACTF128PD256,
22568 IX86_BUILTIN_EXTRACTF128PS256,
22569 IX86_BUILTIN_EXTRACTF128SI256,
22570 IX86_BUILTIN_VZEROALL,
22571 IX86_BUILTIN_VZEROUPPER,
22572 IX86_BUILTIN_VPERMILVARPD,
22573 IX86_BUILTIN_VPERMILVARPS,
22574 IX86_BUILTIN_VPERMILVARPD256,
22575 IX86_BUILTIN_VPERMILVARPS256,
22576 IX86_BUILTIN_VPERMILPD,
22577 IX86_BUILTIN_VPERMILPS,
22578 IX86_BUILTIN_VPERMILPD256,
22579 IX86_BUILTIN_VPERMILPS256,
22580 IX86_BUILTIN_VPERMIL2PD,
22581 IX86_BUILTIN_VPERMIL2PS,
22582 IX86_BUILTIN_VPERMIL2PD256,
22583 IX86_BUILTIN_VPERMIL2PS256,
22584 IX86_BUILTIN_VPERM2F128PD256,
22585 IX86_BUILTIN_VPERM2F128PS256,
22586 IX86_BUILTIN_VPERM2F128SI256,
22587 IX86_BUILTIN_VBROADCASTSS,
22588 IX86_BUILTIN_VBROADCASTSD256,
22589 IX86_BUILTIN_VBROADCASTSS256,
22590 IX86_BUILTIN_VBROADCASTPD256,
22591 IX86_BUILTIN_VBROADCASTPS256,
22592 IX86_BUILTIN_VINSERTF128PD256,
22593 IX86_BUILTIN_VINSERTF128PS256,
22594 IX86_BUILTIN_VINSERTF128SI256,
22595 IX86_BUILTIN_LOADUPD256,
22596 IX86_BUILTIN_LOADUPS256,
22597 IX86_BUILTIN_STOREUPD256,
22598 IX86_BUILTIN_STOREUPS256,
22599 IX86_BUILTIN_LDDQU256,
22600 IX86_BUILTIN_MOVNTDQ256,
22601 IX86_BUILTIN_MOVNTPD256,
22602 IX86_BUILTIN_MOVNTPS256,
22603 IX86_BUILTIN_LOADDQU256,
22604 IX86_BUILTIN_STOREDQU256,
22605 IX86_BUILTIN_MASKLOADPD,
22606 IX86_BUILTIN_MASKLOADPS,
22607 IX86_BUILTIN_MASKSTOREPD,
22608 IX86_BUILTIN_MASKSTOREPS,
22609 IX86_BUILTIN_MASKLOADPD256,
22610 IX86_BUILTIN_MASKLOADPS256,
22611 IX86_BUILTIN_MASKSTOREPD256,
22612 IX86_BUILTIN_MASKSTOREPS256,
22613 IX86_BUILTIN_MOVSHDUP256,
22614 IX86_BUILTIN_MOVSLDUP256,
22615 IX86_BUILTIN_MOVDDUP256,
22616
22617 IX86_BUILTIN_SQRTPD256,
22618 IX86_BUILTIN_SQRTPS256,
22619 IX86_BUILTIN_SQRTPS_NR256,
22620 IX86_BUILTIN_RSQRTPS256,
22621 IX86_BUILTIN_RSQRTPS_NR256,
22622
22623 IX86_BUILTIN_RCPPS256,
22624
22625 IX86_BUILTIN_ROUNDPD256,
22626 IX86_BUILTIN_ROUNDPS256,
22627
22628 IX86_BUILTIN_UNPCKHPD256,
22629 IX86_BUILTIN_UNPCKLPD256,
22630 IX86_BUILTIN_UNPCKHPS256,
22631 IX86_BUILTIN_UNPCKLPS256,
22632
22633 IX86_BUILTIN_SI256_SI,
22634 IX86_BUILTIN_PS256_PS,
22635 IX86_BUILTIN_PD256_PD,
22636 IX86_BUILTIN_SI_SI256,
22637 IX86_BUILTIN_PS_PS256,
22638 IX86_BUILTIN_PD_PD256,
22639
22640 IX86_BUILTIN_VTESTZPD,
22641 IX86_BUILTIN_VTESTCPD,
22642 IX86_BUILTIN_VTESTNZCPD,
22643 IX86_BUILTIN_VTESTZPS,
22644 IX86_BUILTIN_VTESTCPS,
22645 IX86_BUILTIN_VTESTNZCPS,
22646 IX86_BUILTIN_VTESTZPD256,
22647 IX86_BUILTIN_VTESTCPD256,
22648 IX86_BUILTIN_VTESTNZCPD256,
22649 IX86_BUILTIN_VTESTZPS256,
22650 IX86_BUILTIN_VTESTCPS256,
22651 IX86_BUILTIN_VTESTNZCPS256,
22652 IX86_BUILTIN_PTESTZ256,
22653 IX86_BUILTIN_PTESTC256,
22654 IX86_BUILTIN_PTESTNZC256,
22655
22656 IX86_BUILTIN_MOVMSKPD256,
22657 IX86_BUILTIN_MOVMSKPS256,
22658
22659 /* TFmode support builtins. */
22660 IX86_BUILTIN_INFQ,
22661 IX86_BUILTIN_HUGE_VALQ,
22662 IX86_BUILTIN_FABSQ,
22663 IX86_BUILTIN_COPYSIGNQ,
22664
22665 /* Vectorizer support builtins. */
22666 IX86_BUILTIN_CPYSGNPS,
22667 IX86_BUILTIN_CPYSGNPD,
22668
22669 IX86_BUILTIN_CVTUDQ2PS,
22670
22671 IX86_BUILTIN_VEC_PERM_V2DF,
22672 IX86_BUILTIN_VEC_PERM_V4SF,
22673 IX86_BUILTIN_VEC_PERM_V2DI,
22674 IX86_BUILTIN_VEC_PERM_V4SI,
22675 IX86_BUILTIN_VEC_PERM_V8HI,
22676 IX86_BUILTIN_VEC_PERM_V16QI,
22677 IX86_BUILTIN_VEC_PERM_V2DI_U,
22678 IX86_BUILTIN_VEC_PERM_V4SI_U,
22679 IX86_BUILTIN_VEC_PERM_V8HI_U,
22680 IX86_BUILTIN_VEC_PERM_V16QI_U,
22681 IX86_BUILTIN_VEC_PERM_V4DF,
22682 IX86_BUILTIN_VEC_PERM_V8SF,
22683
22684 /* FMA4 and XOP instructions. */
22685 IX86_BUILTIN_VFMADDSS,
22686 IX86_BUILTIN_VFMADDSD,
22687 IX86_BUILTIN_VFMADDPS,
22688 IX86_BUILTIN_VFMADDPD,
22689 IX86_BUILTIN_VFMSUBSS,
22690 IX86_BUILTIN_VFMSUBSD,
22691 IX86_BUILTIN_VFMSUBPS,
22692 IX86_BUILTIN_VFMSUBPD,
22693 IX86_BUILTIN_VFMADDSUBPS,
22694 IX86_BUILTIN_VFMADDSUBPD,
22695 IX86_BUILTIN_VFMSUBADDPS,
22696 IX86_BUILTIN_VFMSUBADDPD,
22697 IX86_BUILTIN_VFNMADDSS,
22698 IX86_BUILTIN_VFNMADDSD,
22699 IX86_BUILTIN_VFNMADDPS,
22700 IX86_BUILTIN_VFNMADDPD,
22701 IX86_BUILTIN_VFNMSUBSS,
22702 IX86_BUILTIN_VFNMSUBSD,
22703 IX86_BUILTIN_VFNMSUBPS,
22704 IX86_BUILTIN_VFNMSUBPD,
22705 IX86_BUILTIN_VFMADDPS256,
22706 IX86_BUILTIN_VFMADDPD256,
22707 IX86_BUILTIN_VFMSUBPS256,
22708 IX86_BUILTIN_VFMSUBPD256,
22709 IX86_BUILTIN_VFMADDSUBPS256,
22710 IX86_BUILTIN_VFMADDSUBPD256,
22711 IX86_BUILTIN_VFMSUBADDPS256,
22712 IX86_BUILTIN_VFMSUBADDPD256,
22713 IX86_BUILTIN_VFNMADDPS256,
22714 IX86_BUILTIN_VFNMADDPD256,
22715 IX86_BUILTIN_VFNMSUBPS256,
22716 IX86_BUILTIN_VFNMSUBPD256,
22717
22718 IX86_BUILTIN_VPCMOV,
22719 IX86_BUILTIN_VPCMOV_V2DI,
22720 IX86_BUILTIN_VPCMOV_V4SI,
22721 IX86_BUILTIN_VPCMOV_V8HI,
22722 IX86_BUILTIN_VPCMOV_V16QI,
22723 IX86_BUILTIN_VPCMOV_V4SF,
22724 IX86_BUILTIN_VPCMOV_V2DF,
22725 IX86_BUILTIN_VPCMOV256,
22726 IX86_BUILTIN_VPCMOV_V4DI256,
22727 IX86_BUILTIN_VPCMOV_V8SI256,
22728 IX86_BUILTIN_VPCMOV_V16HI256,
22729 IX86_BUILTIN_VPCMOV_V32QI256,
22730 IX86_BUILTIN_VPCMOV_V8SF256,
22731 IX86_BUILTIN_VPCMOV_V4DF256,
22732
22733 IX86_BUILTIN_VPPERM,
22734
22735 IX86_BUILTIN_VPMACSSWW,
22736 IX86_BUILTIN_VPMACSWW,
22737 IX86_BUILTIN_VPMACSSWD,
22738 IX86_BUILTIN_VPMACSWD,
22739 IX86_BUILTIN_VPMACSSDD,
22740 IX86_BUILTIN_VPMACSDD,
22741 IX86_BUILTIN_VPMACSSDQL,
22742 IX86_BUILTIN_VPMACSSDQH,
22743 IX86_BUILTIN_VPMACSDQL,
22744 IX86_BUILTIN_VPMACSDQH,
22745 IX86_BUILTIN_VPMADCSSWD,
22746 IX86_BUILTIN_VPMADCSWD,
22747
22748 IX86_BUILTIN_VPHADDBW,
22749 IX86_BUILTIN_VPHADDBD,
22750 IX86_BUILTIN_VPHADDBQ,
22751 IX86_BUILTIN_VPHADDWD,
22752 IX86_BUILTIN_VPHADDWQ,
22753 IX86_BUILTIN_VPHADDDQ,
22754 IX86_BUILTIN_VPHADDUBW,
22755 IX86_BUILTIN_VPHADDUBD,
22756 IX86_BUILTIN_VPHADDUBQ,
22757 IX86_BUILTIN_VPHADDUWD,
22758 IX86_BUILTIN_VPHADDUWQ,
22759 IX86_BUILTIN_VPHADDUDQ,
22760 IX86_BUILTIN_VPHSUBBW,
22761 IX86_BUILTIN_VPHSUBWD,
22762 IX86_BUILTIN_VPHSUBDQ,
22763
22764 IX86_BUILTIN_VPROTB,
22765 IX86_BUILTIN_VPROTW,
22766 IX86_BUILTIN_VPROTD,
22767 IX86_BUILTIN_VPROTQ,
22768 IX86_BUILTIN_VPROTB_IMM,
22769 IX86_BUILTIN_VPROTW_IMM,
22770 IX86_BUILTIN_VPROTD_IMM,
22771 IX86_BUILTIN_VPROTQ_IMM,
22772
22773 IX86_BUILTIN_VPSHLB,
22774 IX86_BUILTIN_VPSHLW,
22775 IX86_BUILTIN_VPSHLD,
22776 IX86_BUILTIN_VPSHLQ,
22777 IX86_BUILTIN_VPSHAB,
22778 IX86_BUILTIN_VPSHAW,
22779 IX86_BUILTIN_VPSHAD,
22780 IX86_BUILTIN_VPSHAQ,
22781
22782 IX86_BUILTIN_VFRCZSS,
22783 IX86_BUILTIN_VFRCZSD,
22784 IX86_BUILTIN_VFRCZPS,
22785 IX86_BUILTIN_VFRCZPD,
22786 IX86_BUILTIN_VFRCZPS256,
22787 IX86_BUILTIN_VFRCZPD256,
22788
22789 IX86_BUILTIN_VPCOMEQUB,
22790 IX86_BUILTIN_VPCOMNEUB,
22791 IX86_BUILTIN_VPCOMLTUB,
22792 IX86_BUILTIN_VPCOMLEUB,
22793 IX86_BUILTIN_VPCOMGTUB,
22794 IX86_BUILTIN_VPCOMGEUB,
22795 IX86_BUILTIN_VPCOMFALSEUB,
22796 IX86_BUILTIN_VPCOMTRUEUB,
22797
22798 IX86_BUILTIN_VPCOMEQUW,
22799 IX86_BUILTIN_VPCOMNEUW,
22800 IX86_BUILTIN_VPCOMLTUW,
22801 IX86_BUILTIN_VPCOMLEUW,
22802 IX86_BUILTIN_VPCOMGTUW,
22803 IX86_BUILTIN_VPCOMGEUW,
22804 IX86_BUILTIN_VPCOMFALSEUW,
22805 IX86_BUILTIN_VPCOMTRUEUW,
22806
22807 IX86_BUILTIN_VPCOMEQUD,
22808 IX86_BUILTIN_VPCOMNEUD,
22809 IX86_BUILTIN_VPCOMLTUD,
22810 IX86_BUILTIN_VPCOMLEUD,
22811 IX86_BUILTIN_VPCOMGTUD,
22812 IX86_BUILTIN_VPCOMGEUD,
22813 IX86_BUILTIN_VPCOMFALSEUD,
22814 IX86_BUILTIN_VPCOMTRUEUD,
22815
22816 IX86_BUILTIN_VPCOMEQUQ,
22817 IX86_BUILTIN_VPCOMNEUQ,
22818 IX86_BUILTIN_VPCOMLTUQ,
22819 IX86_BUILTIN_VPCOMLEUQ,
22820 IX86_BUILTIN_VPCOMGTUQ,
22821 IX86_BUILTIN_VPCOMGEUQ,
22822 IX86_BUILTIN_VPCOMFALSEUQ,
22823 IX86_BUILTIN_VPCOMTRUEUQ,
22824
22825 IX86_BUILTIN_VPCOMEQB,
22826 IX86_BUILTIN_VPCOMNEB,
22827 IX86_BUILTIN_VPCOMLTB,
22828 IX86_BUILTIN_VPCOMLEB,
22829 IX86_BUILTIN_VPCOMGTB,
22830 IX86_BUILTIN_VPCOMGEB,
22831 IX86_BUILTIN_VPCOMFALSEB,
22832 IX86_BUILTIN_VPCOMTRUEB,
22833
22834 IX86_BUILTIN_VPCOMEQW,
22835 IX86_BUILTIN_VPCOMNEW,
22836 IX86_BUILTIN_VPCOMLTW,
22837 IX86_BUILTIN_VPCOMLEW,
22838 IX86_BUILTIN_VPCOMGTW,
22839 IX86_BUILTIN_VPCOMGEW,
22840 IX86_BUILTIN_VPCOMFALSEW,
22841 IX86_BUILTIN_VPCOMTRUEW,
22842
22843 IX86_BUILTIN_VPCOMEQD,
22844 IX86_BUILTIN_VPCOMNED,
22845 IX86_BUILTIN_VPCOMLTD,
22846 IX86_BUILTIN_VPCOMLED,
22847 IX86_BUILTIN_VPCOMGTD,
22848 IX86_BUILTIN_VPCOMGED,
22849 IX86_BUILTIN_VPCOMFALSED,
22850 IX86_BUILTIN_VPCOMTRUED,
22851
22852 IX86_BUILTIN_VPCOMEQQ,
22853 IX86_BUILTIN_VPCOMNEQ,
22854 IX86_BUILTIN_VPCOMLTQ,
22855 IX86_BUILTIN_VPCOMLEQ,
22856 IX86_BUILTIN_VPCOMGTQ,
22857 IX86_BUILTIN_VPCOMGEQ,
22858 IX86_BUILTIN_VPCOMFALSEQ,
22859 IX86_BUILTIN_VPCOMTRUEQ,
22860
22861 /* LWP instructions. */
22862 IX86_BUILTIN_LLWPCB,
22863 IX86_BUILTIN_SLWPCB,
22864 IX86_BUILTIN_LWPVAL32,
22865 IX86_BUILTIN_LWPVAL64,
22866 IX86_BUILTIN_LWPINS32,
22867 IX86_BUILTIN_LWPINS64,
22868
22869 IX86_BUILTIN_CLZS,
22870
22871 /* FSGSBASE instructions. */
22872 IX86_BUILTIN_RDFSBASE32,
22873 IX86_BUILTIN_RDFSBASE64,
22874 IX86_BUILTIN_RDGSBASE32,
22875 IX86_BUILTIN_RDGSBASE64,
22876 IX86_BUILTIN_WRFSBASE32,
22877 IX86_BUILTIN_WRFSBASE64,
22878 IX86_BUILTIN_WRGSBASE32,
22879 IX86_BUILTIN_WRGSBASE64,
22880
22881 /* RDRND instructions. */
22882 IX86_BUILTIN_RDRAND16,
22883 IX86_BUILTIN_RDRAND32,
22884 IX86_BUILTIN_RDRAND64,
22885
22886 /* F16C instructions. */
22887 IX86_BUILTIN_CVTPH2PS,
22888 IX86_BUILTIN_CVTPH2PS256,
22889 IX86_BUILTIN_CVTPS2PH,
22890 IX86_BUILTIN_CVTPS2PH256,
22891
22892 IX86_BUILTIN_MAX
22893 };
22894
22895 /* Table for the ix86 builtin decls. */
22896 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
22897
22898 /* Table of all of the builtin functions that are possible with different ISA's
22899 but are waiting to be built until a function is declared to use that
22900 ISA. */
22901 struct builtin_isa {
22902 const char *name; /* function name */
22903 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
22904 int isa; /* isa_flags this builtin is defined for */
22905 bool const_p; /* true if the declaration is constant */
22906 bool set_and_not_built_p;
22907 };
22908
22909 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
22910
22911
22912 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
22913 of which isa_flags to use in the ix86_builtins_isa array. Stores the
22914 function decl in the ix86_builtins array. Returns the function decl or
22915 NULL_TREE, if the builtin was not added.
22916
22917 If the front end has a special hook for builtin functions, delay adding
22918 builtin functions that aren't in the current ISA until the ISA is changed
22919 with function specific optimization. Doing so, can save about 300K for the
22920 default compiler. When the builtin is expanded, check at that time whether
22921 it is valid.
22922
22923 If the front end doesn't have a special hook, record all builtins, even if
22924 it isn't an instruction set in the current ISA in case the user uses
22925 function specific options for a different ISA, so that we don't get scope
22926 errors if a builtin is added in the middle of a function scope. */
22927
22928 static inline tree
22929 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
22930 enum ix86_builtins code)
22931 {
22932 tree decl = NULL_TREE;
22933
22934 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
22935 {
22936 ix86_builtins_isa[(int) code].isa = mask;
22937
22938 mask &= ~OPTION_MASK_ISA_64BIT;
22939 if (mask == 0
22940 || (mask & ix86_isa_flags) != 0
22941 || (lang_hooks.builtin_function
22942 == lang_hooks.builtin_function_ext_scope))
22943
22944 {
22945 tree type = ix86_get_builtin_func_type (tcode);
22946 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
22947 NULL, NULL_TREE);
22948 ix86_builtins[(int) code] = decl;
22949 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
22950 }
22951 else
22952 {
22953 ix86_builtins[(int) code] = NULL_TREE;
22954 ix86_builtins_isa[(int) code].tcode = tcode;
22955 ix86_builtins_isa[(int) code].name = name;
22956 ix86_builtins_isa[(int) code].const_p = false;
22957 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
22958 }
22959 }
22960
22961 return decl;
22962 }
22963
22964 /* Like def_builtin, but also marks the function decl "const". */
22965
22966 static inline tree
22967 def_builtin_const (int mask, const char *name,
22968 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
22969 {
22970 tree decl = def_builtin (mask, name, tcode, code);
22971 if (decl)
22972 TREE_READONLY (decl) = 1;
22973 else
22974 ix86_builtins_isa[(int) code].const_p = true;
22975
22976 return decl;
22977 }
22978
22979 /* Add any new builtin functions for a given ISA that may not have been
22980 declared. This saves a bit of space compared to adding all of the
22981 declarations to the tree, even if we didn't use them. */
22982
22983 static void
22984 ix86_add_new_builtins (int isa)
22985 {
22986 int i;
22987
22988 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
22989 {
22990 if ((ix86_builtins_isa[i].isa & isa) != 0
22991 && ix86_builtins_isa[i].set_and_not_built_p)
22992 {
22993 tree decl, type;
22994
22995 /* Don't define the builtin again. */
22996 ix86_builtins_isa[i].set_and_not_built_p = false;
22997
22998 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
22999 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
23000 type, i, BUILT_IN_MD, NULL,
23001 NULL_TREE);
23002
23003 ix86_builtins[i] = decl;
23004 if (ix86_builtins_isa[i].const_p)
23005 TREE_READONLY (decl) = 1;
23006 }
23007 }
23008 }
23009
23010 /* Bits for builtin_description.flag. */
23011
23012 /* Set when we don't support the comparison natively, and should
23013 swap_comparison in order to support it. */
23014 #define BUILTIN_DESC_SWAP_OPERANDS 1
23015
23016 struct builtin_description
23017 {
23018 const unsigned int mask;
23019 const enum insn_code icode;
23020 const char *const name;
23021 const enum ix86_builtins code;
23022 const enum rtx_code comparison;
23023 const int flag;
23024 };
23025
23026 static const struct builtin_description bdesc_comi[] =
23027 {
23028 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
23029 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
23030 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
23031 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
23032 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
23033 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
23034 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
23035 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
23036 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
23037 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
23038 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
23039 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
23040 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
23041 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
23042 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
23043 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
23044 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
23045 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
23046 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
23047 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
23048 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
23049 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
23050 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
23051 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
23052 };
23053
23054 static const struct builtin_description bdesc_pcmpestr[] =
23055 {
23056 /* SSE4.2 */
23057 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
23058 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
23059 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
23060 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
23061 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
23062 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
23063 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
23064 };
23065
23066 static const struct builtin_description bdesc_pcmpistr[] =
23067 {
23068 /* SSE4.2 */
23069 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
23070 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
23071 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
23072 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
23073 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
23074 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
23075 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
23076 };
23077
23078 /* Special builtins with variable number of arguments. */
23079 static const struct builtin_description bdesc_special_args[] =
23080 {
23081 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
23082 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
23083
23084 /* MMX */
23085 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
23086
23087 /* 3DNow! */
23088 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
23089
23090 /* SSE */
23091 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
23092 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
23093 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
23094
23095 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
23096 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
23097 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
23098 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
23099
23100 /* SSE or 3DNow!A */
23101 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
23102 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
23103
23104 /* SSE2 */
23105 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
23106 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
23107 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
23108 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
23109 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
23110 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
23111 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
23112 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
23113 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
23114
23115 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
23116 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
23117
23118 /* SSE3 */
23119 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
23120
23121 /* SSE4.1 */
23122 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
23123
23124 /* SSE4A */
23125 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
23126 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
23127
23128 /* AVX */
23129 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
23130 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
23131
23132 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
23133 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
23134 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
23135 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
23136 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
23137
23138 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
23139 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
23140 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
23141 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
23142 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
23143 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
23144 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
23145
23146 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
23147 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
23148 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
23149
23150 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
23151 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
23152 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
23153 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
23154 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
23155 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
23156 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
23157 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
23158
23159 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
23160 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
23161 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
23162 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
23163 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
23164 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
23165
23166 /* FSGSBASE */
23167 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
23168 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
23169 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
23170 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
23171 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
23172 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
23173 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
23174 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
23175
23176 /* RDRND */
23177 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandhi, "__builtin_ia32_rdrand16", IX86_BUILTIN_RDRAND16, UNKNOWN, (int) UINT16_FTYPE_VOID },
23178 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandsi, "__builtin_ia32_rdrand32", IX86_BUILTIN_RDRAND32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
23179 { OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, CODE_FOR_rdranddi, "__builtin_ia32_rdrand64", IX86_BUILTIN_RDRAND64, UNKNOWN, (int) UINT64_FTYPE_VOID },
23180 };
23181
23182 /* Builtins with variable number of arguments. */
23183 static const struct builtin_description bdesc_args[] =
23184 {
23185 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
23186 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
23187 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
23188 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
23189 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
23190 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
23191 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
23192
23193 /* MMX */
23194 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23195 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23196 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23197 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23198 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23199 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23200
23201 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23202 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23203 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23204 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23205 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23206 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23207 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23208 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23209
23210 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23211 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23212
23213 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23214 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23215 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23216 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23217
23218 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23219 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23220 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23221 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23222 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23223 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23224
23225 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23226 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23227 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23228 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23229 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
23230 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
23231
23232 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
23233 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
23234 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
23235
23236 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
23237
23238 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
23239 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
23240 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
23241 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
23242 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
23243 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
23244
23245 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
23246 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
23247 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
23248 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
23249 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
23250 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
23251
23252 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
23253 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
23254 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
23255 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
23256
23257 /* 3DNow! */
23258 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
23259 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
23260 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
23261 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
23262
23263 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23264 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23265 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23266 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
23267 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
23268 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
23269 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23270 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23271 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23272 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23273 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23274 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23275 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23276 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23277 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23278
23279 /* 3DNow!A */
23280 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
23281 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
23282 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
23283 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
23284 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23285 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23286
23287 /* SSE */
23288 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
23289 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23290 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23291 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23292 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23293 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23294 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
23295 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
23296 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
23297 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
23298 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
23299 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
23300
23301 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23302
23303 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23304 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23305 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23306 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23307 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23308 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23309 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23310 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23311
23312 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
23313 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
23314 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
23315 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23316 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23317 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23318 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
23319 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
23320 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
23321 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23322 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
23323 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23324 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
23325 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
23326 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
23327 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23328 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
23329 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
23330 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
23331 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23332 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23333 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23334
23335 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23336 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23337 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23338 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23339
23340 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23341 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23342 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23343 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23344
23345 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23346
23347 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23348 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23349 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23350 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23351 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23352
23353 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
23354 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
23355 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
23356
23357 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
23358
23359 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
23360 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
23361 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
23362
23363 /* SSE MMX or 3Dnow!A */
23364 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23365 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23366 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23367
23368 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23369 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23370 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23371 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23372
23373 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
23374 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
23375
23376 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
23377
23378 /* SSE2 */
23379 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23380
23381 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
23382 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
23383 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
23384 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
23385 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
23386 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
23387 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
23388 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
23389 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
23390 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
23391 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
23392 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
23393
23394 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
23395 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
23396 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
23397 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
23398 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
23399 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
23400
23401 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
23402 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
23403 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
23404 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
23405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
23406
23407 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
23408
23409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
23410 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
23411 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
23412 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
23413
23414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
23415 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
23416 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
23417
23418 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23419 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23420 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23421 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23422 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23424 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23426
23427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
23428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
23429 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
23430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
23431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
23432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23433 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
23434 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
23435 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
23436 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
23437 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
23438 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23439 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
23440 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
23441 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
23442 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23443 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
23444 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
23445 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
23446 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23447
23448 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23449 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23450 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23451 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23452
23453 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23454 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23455 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23456 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23457
23458 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23459
23460 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23461 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23462 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23463
23464 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
23465
23466 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23467 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23468 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23469 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23470 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23471 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23472 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23473 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23474
23475 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23476 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23477 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23478 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23479 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23480 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23481 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23482 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23483
23484 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23485 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
23486
23487 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23488 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23489 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23490 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23491
23492 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23493 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23494
23495 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23496 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23497 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23498 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23499 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23500 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23501
23502 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23503 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23504 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23505 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23506
23507 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23508 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23509 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23510 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23511 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23512 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23513 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23514 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23515
23516 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
23517 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
23518 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
23519
23520 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23521 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
23522
23523 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
23524 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
23525
23526 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
23527
23528 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
23529 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
23530 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
23531 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
23532
23533 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
23534 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23535 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23536 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
23537 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23538 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23539 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
23540
23541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
23542 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23543 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23544 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
23545 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23546 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23547 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
23548
23549 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23550 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23551 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23552 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23553
23554 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
23555 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
23556 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
23557
23558 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
23559
23560 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
23561 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
23562
23563 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
23564
23565 /* SSE2 MMX */
23566 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
23567 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
23568
23569 /* SSE3 */
23570 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
23571 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23572
23573 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23574 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23575 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23576 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23577 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23578 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23579
23580 /* SSSE3 */
23581 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
23582 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
23583 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
23584 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
23585 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
23586 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
23587
23588 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23589 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23590 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23591 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23592 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23593 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23594 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23595 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23596 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23597 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23598 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23599 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23600 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
23601 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
23602 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23603 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23604 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23605 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23606 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23607 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23608 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23609 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23610 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23611 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23612
23613 /* SSSE3. */
23614 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
23615 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
23616
23617 /* SSE4.1 */
23618 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23619 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23620 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
23621 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
23622 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23623 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23624 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23625 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
23626 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
23627 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
23628
23629 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
23630 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
23631 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
23632 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
23633 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
23634 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
23635 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
23636 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
23637 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
23638 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
23639 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
23640 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
23641 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
23642
23643 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
23644 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23645 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23646 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23647 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23648 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23649 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23650 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23651 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23652 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23653 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
23654 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23655
23656 /* SSE4.1 */
23657 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
23658 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
23659 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23660 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23661
23662 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23663 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23664 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23665
23666 /* SSE4.2 */
23667 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23668 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
23669 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
23670 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
23671 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
23672
23673 /* SSE4A */
23674 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
23675 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
23676 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
23677 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23678
23679 /* AES */
23680 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
23681 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
23682
23683 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23684 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23685 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23686 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23687
23688 /* PCLMUL */
23689 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
23690
23691 /* AVX */
23692 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23693 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23694 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23695 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23696 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23697 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23698 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23699 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23700 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23701 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23702 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23703 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23704 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23705 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23706 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23707 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23708 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23709 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23710 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23711 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23712 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23713 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23714 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23715 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23716 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23717 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23718
23719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
23720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
23721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
23722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
23723
23724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
23727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
23728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23732 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23733 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23737 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
23738 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
23739 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
23740 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
23741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
23742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
23743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23744 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
23745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23749 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23750 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
23751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
23752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
23753 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23754 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23755 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
23756 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
23757 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
23758
23759 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23760 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23761 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23762
23763 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23764 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23765 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23766 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23767 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23768
23769 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23770
23771 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23772 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23773
23774 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23775 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23776 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23777 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23778
23779 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
23780 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
23781 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
23782 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
23783 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
23784 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
23785
23786 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23787 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23788 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23789 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23790 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23791 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23792 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23793 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23794 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23795 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23797 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23798 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23799 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23800 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23801
23802 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
23803 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
23804
23805 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
23806
23807 /* F16C */
23808 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
23809 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
23810 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
23811 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
23812 };
23813
23814 /* FMA4 and XOP. */
23815 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
23816 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
23817 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
23818 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
23819 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
23820 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
23821 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
23822 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
23823 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
23824 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
23825 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
23826 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
23827 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
23828 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
23829 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
23830 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
23831 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
23832 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
23833 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
23834 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
23835 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
23836 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
23837 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
23838 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
23839 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
23840 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
23841 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
23842 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
23843 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
23844 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
23845 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
23846 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
23847 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
23848 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
23849 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
23850 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
23851 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
23852 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
23853 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
23854 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
23855 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
23856 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
23857 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
23858 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
23859 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
23860 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
23861 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
23862 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
23863 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
23864 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
23865 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
23866 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
23867
23868 static const struct builtin_description bdesc_multi_arg[] =
23869 {
23870 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv4sf4, "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23871 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv2df4, "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23872 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4sf4, "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23873 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv2df4, "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23874 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv4sf4, "__builtin_ia32_vfmsubss", IX86_BUILTIN_VFMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23875 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv2df4, "__builtin_ia32_vfmsubsd", IX86_BUILTIN_VFMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23876 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4sf4, "__builtin_ia32_vfmsubps", IX86_BUILTIN_VFMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23877 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv2df4, "__builtin_ia32_vfmsubpd", IX86_BUILTIN_VFMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23878
23879 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv4sf4, "__builtin_ia32_vfnmaddss", IX86_BUILTIN_VFNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23880 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv2df4, "__builtin_ia32_vfnmaddsd", IX86_BUILTIN_VFNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23881 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4sf4, "__builtin_ia32_vfnmaddps", IX86_BUILTIN_VFNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23882 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv2df4, "__builtin_ia32_vfnmaddpd", IX86_BUILTIN_VFNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23883 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv4sf4, "__builtin_ia32_vfnmsubss", IX86_BUILTIN_VFNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23884 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv2df4, "__builtin_ia32_vfnmsubsd", IX86_BUILTIN_VFNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23885 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4sf4, "__builtin_ia32_vfnmsubps", IX86_BUILTIN_VFNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23886 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv2df4, "__builtin_ia32_vfnmsubpd", IX86_BUILTIN_VFNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23887
23888 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4sf4, "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23889 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv2df4, "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23890 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4sf4, "__builtin_ia32_vfmsubaddps", IX86_BUILTIN_VFMSUBADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23891 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv2df4, "__builtin_ia32_vfmsubaddpd", IX86_BUILTIN_VFMSUBADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23892
23893 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv8sf4256, "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23894 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4df4256, "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23895 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv8sf4256, "__builtin_ia32_vfmsubps256", IX86_BUILTIN_VFMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23896 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4df4256, "__builtin_ia32_vfmsubpd256", IX86_BUILTIN_VFMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23897
23898 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv8sf4256, "__builtin_ia32_vfnmaddps256", IX86_BUILTIN_VFNMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23899 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4df4256, "__builtin_ia32_vfnmaddpd256", IX86_BUILTIN_VFNMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23900 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv8sf4256, "__builtin_ia32_vfnmsubps256", IX86_BUILTIN_VFNMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23901 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4df4256, "__builtin_ia32_vfnmsubpd256", IX86_BUILTIN_VFNMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23902
23903 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv8sf4, "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23904 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4df4, "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23905 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv8sf4, "__builtin_ia32_vfmsubaddps256", IX86_BUILTIN_VFMSUBADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23906 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4, "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23907
23908 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
23909 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
23910 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
23911 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
23912 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
23913 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
23914 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
23915
23916 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
23917 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
23918 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
23919 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
23920 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
23921 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23922 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23923
23924 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
23925
23926 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
23927 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
23928 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23929 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23930 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
23931 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
23932 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23933 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23934 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23935 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23936 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23937 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23938
23939 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23940 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
23941 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
23942 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
23943 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
23944 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
23945 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
23946 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
23947 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23948 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
23949 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
23950 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
23951 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23952 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
23953 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
23954 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
23955
23956 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
23957 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
23958 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
23959 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
23960 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2256, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
23961 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2256, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
23962
23963 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23964 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
23965 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
23966 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23967 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
23968 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23969 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23970 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
23971 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
23972 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23973 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
23974 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23975 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23976 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23977 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23978
23979 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
23980 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
23981 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
23982 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
23983 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
23984 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
23985 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
23986
23987 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
23988 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
23989 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
23990 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
23991 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
23992 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
23993 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
23994
23995 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
23996 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
23997 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
23998 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
23999 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
24000 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
24001 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
24002
24003 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
24004 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
24005 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
24006 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
24007 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
24008 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
24009 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
24010
24011 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
24012 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
24013 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
24014 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
24015 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
24016 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
24017 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
24018
24019 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
24020 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
24021 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
24022 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
24023 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
24024 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
24025 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
24026
24027 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
24028 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
24029 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
24030 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
24031 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
24032 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
24033 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
24034
24035 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
24036 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
24037 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
24038 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
24039 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
24040 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
24041 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
24042
24043 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
24044 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
24045 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
24046 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
24047 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
24048 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
24049 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
24050 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
24051
24052 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
24053 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
24054 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
24055 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
24056 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
24057 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
24058 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
24059 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
24060
24061 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
24062 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
24063 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
24064 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
24065
24066 };
24067
24068 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
24069 in the current target ISA to allow the user to compile particular modules
24070 with different target specific options that differ from the command line
24071 options. */
24072 static void
24073 ix86_init_mmx_sse_builtins (void)
24074 {
24075 const struct builtin_description * d;
24076 enum ix86_builtin_func_type ftype;
24077 size_t i;
24078
24079 /* Add all special builtins with variable number of operands. */
24080 for (i = 0, d = bdesc_special_args;
24081 i < ARRAY_SIZE (bdesc_special_args);
24082 i++, d++)
24083 {
24084 if (d->name == 0)
24085 continue;
24086
24087 ftype = (enum ix86_builtin_func_type) d->flag;
24088 def_builtin (d->mask, d->name, ftype, d->code);
24089 }
24090
24091 /* Add all builtins with variable number of operands. */
24092 for (i = 0, d = bdesc_args;
24093 i < ARRAY_SIZE (bdesc_args);
24094 i++, d++)
24095 {
24096 if (d->name == 0)
24097 continue;
24098
24099 ftype = (enum ix86_builtin_func_type) d->flag;
24100 def_builtin_const (d->mask, d->name, ftype, d->code);
24101 }
24102
24103 /* pcmpestr[im] insns. */
24104 for (i = 0, d = bdesc_pcmpestr;
24105 i < ARRAY_SIZE (bdesc_pcmpestr);
24106 i++, d++)
24107 {
24108 if (d->code == IX86_BUILTIN_PCMPESTRM128)
24109 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
24110 else
24111 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
24112 def_builtin_const (d->mask, d->name, ftype, d->code);
24113 }
24114
24115 /* pcmpistr[im] insns. */
24116 for (i = 0, d = bdesc_pcmpistr;
24117 i < ARRAY_SIZE (bdesc_pcmpistr);
24118 i++, d++)
24119 {
24120 if (d->code == IX86_BUILTIN_PCMPISTRM128)
24121 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
24122 else
24123 ftype = INT_FTYPE_V16QI_V16QI_INT;
24124 def_builtin_const (d->mask, d->name, ftype, d->code);
24125 }
24126
24127 /* comi/ucomi insns. */
24128 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
24129 {
24130 if (d->mask == OPTION_MASK_ISA_SSE2)
24131 ftype = INT_FTYPE_V2DF_V2DF;
24132 else
24133 ftype = INT_FTYPE_V4SF_V4SF;
24134 def_builtin_const (d->mask, d->name, ftype, d->code);
24135 }
24136
24137 /* SSE */
24138 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
24139 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
24140 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
24141 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
24142
24143 /* SSE or 3DNow!A */
24144 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
24145 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
24146 IX86_BUILTIN_MASKMOVQ);
24147
24148 /* SSE2 */
24149 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
24150 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
24151
24152 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
24153 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
24154 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
24155 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
24156
24157 /* SSE3. */
24158 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
24159 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
24160 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
24161 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
24162
24163 /* AES */
24164 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
24165 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
24166 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
24167 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
24168 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
24169 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
24170 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
24171 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
24172 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
24173 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
24174 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
24175 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
24176
24177 /* PCLMUL */
24178 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
24179 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
24180
24181 /* MMX access to the vec_init patterns. */
24182 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
24183 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
24184
24185 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
24186 V4HI_FTYPE_HI_HI_HI_HI,
24187 IX86_BUILTIN_VEC_INIT_V4HI);
24188
24189 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
24190 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
24191 IX86_BUILTIN_VEC_INIT_V8QI);
24192
24193 /* Access to the vec_extract patterns. */
24194 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
24195 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
24196 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
24197 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
24198 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
24199 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
24200 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
24201 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
24202 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
24203 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
24204
24205 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
24206 "__builtin_ia32_vec_ext_v4hi",
24207 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
24208
24209 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
24210 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
24211
24212 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
24213 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
24214
24215 /* Access to the vec_set patterns. */
24216 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
24217 "__builtin_ia32_vec_set_v2di",
24218 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
24219
24220 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
24221 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
24222
24223 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
24224 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
24225
24226 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
24227 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
24228
24229 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
24230 "__builtin_ia32_vec_set_v4hi",
24231 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
24232
24233 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
24234 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
24235
24236 /* Add FMA4 multi-arg argument instructions */
24237 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
24238 {
24239 if (d->name == 0)
24240 continue;
24241
24242 ftype = (enum ix86_builtin_func_type) d->flag;
24243 def_builtin_const (d->mask, d->name, ftype, d->code);
24244 }
24245 }
24246
24247 /* Internal method for ix86_init_builtins. */
24248
24249 static void
24250 ix86_init_builtins_va_builtins_abi (void)
24251 {
24252 tree ms_va_ref, sysv_va_ref;
24253 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
24254 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
24255 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
24256 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
24257
24258 if (!TARGET_64BIT)
24259 return;
24260 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
24261 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
24262 ms_va_ref = build_reference_type (ms_va_list_type_node);
24263 sysv_va_ref =
24264 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
24265
24266 fnvoid_va_end_ms =
24267 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
24268 fnvoid_va_start_ms =
24269 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
24270 fnvoid_va_end_sysv =
24271 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
24272 fnvoid_va_start_sysv =
24273 build_varargs_function_type_list (void_type_node, sysv_va_ref,
24274 NULL_TREE);
24275 fnvoid_va_copy_ms =
24276 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
24277 NULL_TREE);
24278 fnvoid_va_copy_sysv =
24279 build_function_type_list (void_type_node, sysv_va_ref,
24280 sysv_va_ref, NULL_TREE);
24281
24282 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
24283 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
24284 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
24285 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
24286 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
24287 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
24288 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
24289 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24290 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
24291 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24292 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
24293 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24294 }
24295
24296 static void
24297 ix86_init_builtin_types (void)
24298 {
24299 tree float128_type_node, float80_type_node;
24300
24301 /* The __float80 type. */
24302 float80_type_node = long_double_type_node;
24303 if (TYPE_MODE (float80_type_node) != XFmode)
24304 {
24305 /* The __float80 type. */
24306 float80_type_node = make_node (REAL_TYPE);
24307
24308 TYPE_PRECISION (float80_type_node) = 80;
24309 layout_type (float80_type_node);
24310 }
24311 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
24312
24313 /* The __float128 type. */
24314 float128_type_node = make_node (REAL_TYPE);
24315 TYPE_PRECISION (float128_type_node) = 128;
24316 layout_type (float128_type_node);
24317 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
24318
24319 /* This macro is built by i386-builtin-types.awk. */
24320 DEFINE_BUILTIN_PRIMITIVE_TYPES;
24321 }
24322
24323 static void
24324 ix86_init_builtins (void)
24325 {
24326 tree t;
24327
24328 ix86_init_builtin_types ();
24329
24330 /* TFmode support builtins. */
24331 def_builtin_const (0, "__builtin_infq",
24332 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
24333 def_builtin_const (0, "__builtin_huge_valq",
24334 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
24335
24336 /* We will expand them to normal call if SSE2 isn't available since
24337 they are used by libgcc. */
24338 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
24339 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
24340 BUILT_IN_MD, "__fabstf2", NULL_TREE);
24341 TREE_READONLY (t) = 1;
24342 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
24343
24344 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
24345 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
24346 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
24347 TREE_READONLY (t) = 1;
24348 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
24349
24350 ix86_init_mmx_sse_builtins ();
24351
24352 if (TARGET_64BIT)
24353 ix86_init_builtins_va_builtins_abi ();
24354 }
24355
24356 /* Return the ix86 builtin for CODE. */
24357
24358 static tree
24359 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
24360 {
24361 if (code >= IX86_BUILTIN_MAX)
24362 return error_mark_node;
24363
24364 return ix86_builtins[code];
24365 }
24366
24367 /* Errors in the source file can cause expand_expr to return const0_rtx
24368 where we expect a vector. To avoid crashing, use one of the vector
24369 clear instructions. */
24370 static rtx
24371 safe_vector_operand (rtx x, enum machine_mode mode)
24372 {
24373 if (x == const0_rtx)
24374 x = CONST0_RTX (mode);
24375 return x;
24376 }
24377
24378 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
24379
24380 static rtx
24381 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
24382 {
24383 rtx pat;
24384 tree arg0 = CALL_EXPR_ARG (exp, 0);
24385 tree arg1 = CALL_EXPR_ARG (exp, 1);
24386 rtx op0 = expand_normal (arg0);
24387 rtx op1 = expand_normal (arg1);
24388 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24389 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24390 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24391
24392 if (VECTOR_MODE_P (mode0))
24393 op0 = safe_vector_operand (op0, mode0);
24394 if (VECTOR_MODE_P (mode1))
24395 op1 = safe_vector_operand (op1, mode1);
24396
24397 if (optimize || !target
24398 || GET_MODE (target) != tmode
24399 || !insn_data[icode].operand[0].predicate (target, tmode))
24400 target = gen_reg_rtx (tmode);
24401
24402 if (GET_MODE (op1) == SImode && mode1 == TImode)
24403 {
24404 rtx x = gen_reg_rtx (V4SImode);
24405 emit_insn (gen_sse2_loadd (x, op1));
24406 op1 = gen_lowpart (TImode, x);
24407 }
24408
24409 if (!insn_data[icode].operand[1].predicate (op0, mode0))
24410 op0 = copy_to_mode_reg (mode0, op0);
24411 if (!insn_data[icode].operand[2].predicate (op1, mode1))
24412 op1 = copy_to_mode_reg (mode1, op1);
24413
24414 pat = GEN_FCN (icode) (target, op0, op1);
24415 if (! pat)
24416 return 0;
24417
24418 emit_insn (pat);
24419
24420 return target;
24421 }
24422
24423 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
24424
24425 static rtx
24426 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
24427 enum ix86_builtin_func_type m_type,
24428 enum rtx_code sub_code)
24429 {
24430 rtx pat;
24431 int i;
24432 int nargs;
24433 bool comparison_p = false;
24434 bool tf_p = false;
24435 bool last_arg_constant = false;
24436 int num_memory = 0;
24437 struct {
24438 rtx op;
24439 enum machine_mode mode;
24440 } args[4];
24441
24442 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24443
24444 switch (m_type)
24445 {
24446 case MULTI_ARG_4_DF2_DI_I:
24447 case MULTI_ARG_4_DF2_DI_I1:
24448 case MULTI_ARG_4_SF2_SI_I:
24449 case MULTI_ARG_4_SF2_SI_I1:
24450 nargs = 4;
24451 last_arg_constant = true;
24452 break;
24453
24454 case MULTI_ARG_3_SF:
24455 case MULTI_ARG_3_DF:
24456 case MULTI_ARG_3_SF2:
24457 case MULTI_ARG_3_DF2:
24458 case MULTI_ARG_3_DI:
24459 case MULTI_ARG_3_SI:
24460 case MULTI_ARG_3_SI_DI:
24461 case MULTI_ARG_3_HI:
24462 case MULTI_ARG_3_HI_SI:
24463 case MULTI_ARG_3_QI:
24464 case MULTI_ARG_3_DI2:
24465 case MULTI_ARG_3_SI2:
24466 case MULTI_ARG_3_HI2:
24467 case MULTI_ARG_3_QI2:
24468 nargs = 3;
24469 break;
24470
24471 case MULTI_ARG_2_SF:
24472 case MULTI_ARG_2_DF:
24473 case MULTI_ARG_2_DI:
24474 case MULTI_ARG_2_SI:
24475 case MULTI_ARG_2_HI:
24476 case MULTI_ARG_2_QI:
24477 nargs = 2;
24478 break;
24479
24480 case MULTI_ARG_2_DI_IMM:
24481 case MULTI_ARG_2_SI_IMM:
24482 case MULTI_ARG_2_HI_IMM:
24483 case MULTI_ARG_2_QI_IMM:
24484 nargs = 2;
24485 last_arg_constant = true;
24486 break;
24487
24488 case MULTI_ARG_1_SF:
24489 case MULTI_ARG_1_DF:
24490 case MULTI_ARG_1_SF2:
24491 case MULTI_ARG_1_DF2:
24492 case MULTI_ARG_1_DI:
24493 case MULTI_ARG_1_SI:
24494 case MULTI_ARG_1_HI:
24495 case MULTI_ARG_1_QI:
24496 case MULTI_ARG_1_SI_DI:
24497 case MULTI_ARG_1_HI_DI:
24498 case MULTI_ARG_1_HI_SI:
24499 case MULTI_ARG_1_QI_DI:
24500 case MULTI_ARG_1_QI_SI:
24501 case MULTI_ARG_1_QI_HI:
24502 nargs = 1;
24503 break;
24504
24505 case MULTI_ARG_2_DI_CMP:
24506 case MULTI_ARG_2_SI_CMP:
24507 case MULTI_ARG_2_HI_CMP:
24508 case MULTI_ARG_2_QI_CMP:
24509 nargs = 2;
24510 comparison_p = true;
24511 break;
24512
24513 case MULTI_ARG_2_SF_TF:
24514 case MULTI_ARG_2_DF_TF:
24515 case MULTI_ARG_2_DI_TF:
24516 case MULTI_ARG_2_SI_TF:
24517 case MULTI_ARG_2_HI_TF:
24518 case MULTI_ARG_2_QI_TF:
24519 nargs = 2;
24520 tf_p = true;
24521 break;
24522
24523 default:
24524 gcc_unreachable ();
24525 }
24526
24527 if (optimize || !target
24528 || GET_MODE (target) != tmode
24529 || !insn_data[icode].operand[0].predicate (target, tmode))
24530 target = gen_reg_rtx (tmode);
24531
24532 gcc_assert (nargs <= 4);
24533
24534 for (i = 0; i < nargs; i++)
24535 {
24536 tree arg = CALL_EXPR_ARG (exp, i);
24537 rtx op = expand_normal (arg);
24538 int adjust = (comparison_p) ? 1 : 0;
24539 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
24540
24541 if (last_arg_constant && i == nargs-1)
24542 {
24543 if (!CONST_INT_P (op))
24544 {
24545 error ("last argument must be an immediate");
24546 return gen_reg_rtx (tmode);
24547 }
24548 }
24549 else
24550 {
24551 if (VECTOR_MODE_P (mode))
24552 op = safe_vector_operand (op, mode);
24553
24554 /* If we aren't optimizing, only allow one memory operand to be
24555 generated. */
24556 if (memory_operand (op, mode))
24557 num_memory++;
24558
24559 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
24560
24561 if (optimize
24562 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
24563 || num_memory > 1)
24564 op = force_reg (mode, op);
24565 }
24566
24567 args[i].op = op;
24568 args[i].mode = mode;
24569 }
24570
24571 switch (nargs)
24572 {
24573 case 1:
24574 pat = GEN_FCN (icode) (target, args[0].op);
24575 break;
24576
24577 case 2:
24578 if (tf_p)
24579 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
24580 GEN_INT ((int)sub_code));
24581 else if (! comparison_p)
24582 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24583 else
24584 {
24585 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
24586 args[0].op,
24587 args[1].op);
24588
24589 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
24590 }
24591 break;
24592
24593 case 3:
24594 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24595 break;
24596
24597 case 4:
24598 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
24599 break;
24600
24601 default:
24602 gcc_unreachable ();
24603 }
24604
24605 if (! pat)
24606 return 0;
24607
24608 emit_insn (pat);
24609 return target;
24610 }
24611
24612 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
24613 insns with vec_merge. */
24614
24615 static rtx
24616 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
24617 rtx target)
24618 {
24619 rtx pat;
24620 tree arg0 = CALL_EXPR_ARG (exp, 0);
24621 rtx op1, op0 = expand_normal (arg0);
24622 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24623 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24624
24625 if (optimize || !target
24626 || GET_MODE (target) != tmode
24627 || !insn_data[icode].operand[0].predicate (target, tmode))
24628 target = gen_reg_rtx (tmode);
24629
24630 if (VECTOR_MODE_P (mode0))
24631 op0 = safe_vector_operand (op0, mode0);
24632
24633 if ((optimize && !register_operand (op0, mode0))
24634 || !insn_data[icode].operand[1].predicate (op0, mode0))
24635 op0 = copy_to_mode_reg (mode0, op0);
24636
24637 op1 = op0;
24638 if (!insn_data[icode].operand[2].predicate (op1, mode0))
24639 op1 = copy_to_mode_reg (mode0, op1);
24640
24641 pat = GEN_FCN (icode) (target, op0, op1);
24642 if (! pat)
24643 return 0;
24644 emit_insn (pat);
24645 return target;
24646 }
24647
24648 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
24649
24650 static rtx
24651 ix86_expand_sse_compare (const struct builtin_description *d,
24652 tree exp, rtx target, bool swap)
24653 {
24654 rtx pat;
24655 tree arg0 = CALL_EXPR_ARG (exp, 0);
24656 tree arg1 = CALL_EXPR_ARG (exp, 1);
24657 rtx op0 = expand_normal (arg0);
24658 rtx op1 = expand_normal (arg1);
24659 rtx op2;
24660 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
24661 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
24662 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
24663 enum rtx_code comparison = d->comparison;
24664
24665 if (VECTOR_MODE_P (mode0))
24666 op0 = safe_vector_operand (op0, mode0);
24667 if (VECTOR_MODE_P (mode1))
24668 op1 = safe_vector_operand (op1, mode1);
24669
24670 /* Swap operands if we have a comparison that isn't available in
24671 hardware. */
24672 if (swap)
24673 {
24674 rtx tmp = gen_reg_rtx (mode1);
24675 emit_move_insn (tmp, op1);
24676 op1 = op0;
24677 op0 = tmp;
24678 }
24679
24680 if (optimize || !target
24681 || GET_MODE (target) != tmode
24682 || !insn_data[d->icode].operand[0].predicate (target, tmode))
24683 target = gen_reg_rtx (tmode);
24684
24685 if ((optimize && !register_operand (op0, mode0))
24686 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
24687 op0 = copy_to_mode_reg (mode0, op0);
24688 if ((optimize && !register_operand (op1, mode1))
24689 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
24690 op1 = copy_to_mode_reg (mode1, op1);
24691
24692 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
24693 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
24694 if (! pat)
24695 return 0;
24696 emit_insn (pat);
24697 return target;
24698 }
24699
24700 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
24701
24702 static rtx
24703 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
24704 rtx target)
24705 {
24706 rtx pat;
24707 tree arg0 = CALL_EXPR_ARG (exp, 0);
24708 tree arg1 = CALL_EXPR_ARG (exp, 1);
24709 rtx op0 = expand_normal (arg0);
24710 rtx op1 = expand_normal (arg1);
24711 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24712 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24713 enum rtx_code comparison = d->comparison;
24714
24715 if (VECTOR_MODE_P (mode0))
24716 op0 = safe_vector_operand (op0, mode0);
24717 if (VECTOR_MODE_P (mode1))
24718 op1 = safe_vector_operand (op1, mode1);
24719
24720 /* Swap operands if we have a comparison that isn't available in
24721 hardware. */
24722 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24723 {
24724 rtx tmp = op1;
24725 op1 = op0;
24726 op0 = tmp;
24727 }
24728
24729 target = gen_reg_rtx (SImode);
24730 emit_move_insn (target, const0_rtx);
24731 target = gen_rtx_SUBREG (QImode, target, 0);
24732
24733 if ((optimize && !register_operand (op0, mode0))
24734 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24735 op0 = copy_to_mode_reg (mode0, op0);
24736 if ((optimize && !register_operand (op1, mode1))
24737 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24738 op1 = copy_to_mode_reg (mode1, op1);
24739
24740 pat = GEN_FCN (d->icode) (op0, op1);
24741 if (! pat)
24742 return 0;
24743 emit_insn (pat);
24744 emit_insn (gen_rtx_SET (VOIDmode,
24745 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24746 gen_rtx_fmt_ee (comparison, QImode,
24747 SET_DEST (pat),
24748 const0_rtx)));
24749
24750 return SUBREG_REG (target);
24751 }
24752
24753 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24754
24755 static rtx
24756 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24757 rtx target)
24758 {
24759 rtx pat;
24760 tree arg0 = CALL_EXPR_ARG (exp, 0);
24761 tree arg1 = CALL_EXPR_ARG (exp, 1);
24762 rtx op0 = expand_normal (arg0);
24763 rtx op1 = expand_normal (arg1);
24764 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24765 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24766 enum rtx_code comparison = d->comparison;
24767
24768 if (VECTOR_MODE_P (mode0))
24769 op0 = safe_vector_operand (op0, mode0);
24770 if (VECTOR_MODE_P (mode1))
24771 op1 = safe_vector_operand (op1, mode1);
24772
24773 target = gen_reg_rtx (SImode);
24774 emit_move_insn (target, const0_rtx);
24775 target = gen_rtx_SUBREG (QImode, target, 0);
24776
24777 if ((optimize && !register_operand (op0, mode0))
24778 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24779 op0 = copy_to_mode_reg (mode0, op0);
24780 if ((optimize && !register_operand (op1, mode1))
24781 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24782 op1 = copy_to_mode_reg (mode1, op1);
24783
24784 pat = GEN_FCN (d->icode) (op0, op1);
24785 if (! pat)
24786 return 0;
24787 emit_insn (pat);
24788 emit_insn (gen_rtx_SET (VOIDmode,
24789 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24790 gen_rtx_fmt_ee (comparison, QImode,
24791 SET_DEST (pat),
24792 const0_rtx)));
24793
24794 return SUBREG_REG (target);
24795 }
24796
24797 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24798
24799 static rtx
24800 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24801 tree exp, rtx target)
24802 {
24803 rtx pat;
24804 tree arg0 = CALL_EXPR_ARG (exp, 0);
24805 tree arg1 = CALL_EXPR_ARG (exp, 1);
24806 tree arg2 = CALL_EXPR_ARG (exp, 2);
24807 tree arg3 = CALL_EXPR_ARG (exp, 3);
24808 tree arg4 = CALL_EXPR_ARG (exp, 4);
24809 rtx scratch0, scratch1;
24810 rtx op0 = expand_normal (arg0);
24811 rtx op1 = expand_normal (arg1);
24812 rtx op2 = expand_normal (arg2);
24813 rtx op3 = expand_normal (arg3);
24814 rtx op4 = expand_normal (arg4);
24815 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24816
24817 tmode0 = insn_data[d->icode].operand[0].mode;
24818 tmode1 = insn_data[d->icode].operand[1].mode;
24819 modev2 = insn_data[d->icode].operand[2].mode;
24820 modei3 = insn_data[d->icode].operand[3].mode;
24821 modev4 = insn_data[d->icode].operand[4].mode;
24822 modei5 = insn_data[d->icode].operand[5].mode;
24823 modeimm = insn_data[d->icode].operand[6].mode;
24824
24825 if (VECTOR_MODE_P (modev2))
24826 op0 = safe_vector_operand (op0, modev2);
24827 if (VECTOR_MODE_P (modev4))
24828 op2 = safe_vector_operand (op2, modev4);
24829
24830 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24831 op0 = copy_to_mode_reg (modev2, op0);
24832 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
24833 op1 = copy_to_mode_reg (modei3, op1);
24834 if ((optimize && !register_operand (op2, modev4))
24835 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
24836 op2 = copy_to_mode_reg (modev4, op2);
24837 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
24838 op3 = copy_to_mode_reg (modei5, op3);
24839
24840 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
24841 {
24842 error ("the fifth argument must be a 8-bit immediate");
24843 return const0_rtx;
24844 }
24845
24846 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24847 {
24848 if (optimize || !target
24849 || GET_MODE (target) != tmode0
24850 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24851 target = gen_reg_rtx (tmode0);
24852
24853 scratch1 = gen_reg_rtx (tmode1);
24854
24855 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24856 }
24857 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24858 {
24859 if (optimize || !target
24860 || GET_MODE (target) != tmode1
24861 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
24862 target = gen_reg_rtx (tmode1);
24863
24864 scratch0 = gen_reg_rtx (tmode0);
24865
24866 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24867 }
24868 else
24869 {
24870 gcc_assert (d->flag);
24871
24872 scratch0 = gen_reg_rtx (tmode0);
24873 scratch1 = gen_reg_rtx (tmode1);
24874
24875 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24876 }
24877
24878 if (! pat)
24879 return 0;
24880
24881 emit_insn (pat);
24882
24883 if (d->flag)
24884 {
24885 target = gen_reg_rtx (SImode);
24886 emit_move_insn (target, const0_rtx);
24887 target = gen_rtx_SUBREG (QImode, target, 0);
24888
24889 emit_insn
24890 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24891 gen_rtx_fmt_ee (EQ, QImode,
24892 gen_rtx_REG ((enum machine_mode) d->flag,
24893 FLAGS_REG),
24894 const0_rtx)));
24895 return SUBREG_REG (target);
24896 }
24897 else
24898 return target;
24899 }
24900
24901
24902 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24903
24904 static rtx
24905 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24906 tree exp, rtx target)
24907 {
24908 rtx pat;
24909 tree arg0 = CALL_EXPR_ARG (exp, 0);
24910 tree arg1 = CALL_EXPR_ARG (exp, 1);
24911 tree arg2 = CALL_EXPR_ARG (exp, 2);
24912 rtx scratch0, scratch1;
24913 rtx op0 = expand_normal (arg0);
24914 rtx op1 = expand_normal (arg1);
24915 rtx op2 = expand_normal (arg2);
24916 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24917
24918 tmode0 = insn_data[d->icode].operand[0].mode;
24919 tmode1 = insn_data[d->icode].operand[1].mode;
24920 modev2 = insn_data[d->icode].operand[2].mode;
24921 modev3 = insn_data[d->icode].operand[3].mode;
24922 modeimm = insn_data[d->icode].operand[4].mode;
24923
24924 if (VECTOR_MODE_P (modev2))
24925 op0 = safe_vector_operand (op0, modev2);
24926 if (VECTOR_MODE_P (modev3))
24927 op1 = safe_vector_operand (op1, modev3);
24928
24929 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24930 op0 = copy_to_mode_reg (modev2, op0);
24931 if ((optimize && !register_operand (op1, modev3))
24932 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
24933 op1 = copy_to_mode_reg (modev3, op1);
24934
24935 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
24936 {
24937 error ("the third argument must be a 8-bit immediate");
24938 return const0_rtx;
24939 }
24940
24941 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24942 {
24943 if (optimize || !target
24944 || GET_MODE (target) != tmode0
24945 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24946 target = gen_reg_rtx (tmode0);
24947
24948 scratch1 = gen_reg_rtx (tmode1);
24949
24950 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24951 }
24952 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24953 {
24954 if (optimize || !target
24955 || GET_MODE (target) != tmode1
24956 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
24957 target = gen_reg_rtx (tmode1);
24958
24959 scratch0 = gen_reg_rtx (tmode0);
24960
24961 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24962 }
24963 else
24964 {
24965 gcc_assert (d->flag);
24966
24967 scratch0 = gen_reg_rtx (tmode0);
24968 scratch1 = gen_reg_rtx (tmode1);
24969
24970 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24971 }
24972
24973 if (! pat)
24974 return 0;
24975
24976 emit_insn (pat);
24977
24978 if (d->flag)
24979 {
24980 target = gen_reg_rtx (SImode);
24981 emit_move_insn (target, const0_rtx);
24982 target = gen_rtx_SUBREG (QImode, target, 0);
24983
24984 emit_insn
24985 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24986 gen_rtx_fmt_ee (EQ, QImode,
24987 gen_rtx_REG ((enum machine_mode) d->flag,
24988 FLAGS_REG),
24989 const0_rtx)));
24990 return SUBREG_REG (target);
24991 }
24992 else
24993 return target;
24994 }
24995
24996 /* Subroutine of ix86_expand_builtin to take care of insns with
24997 variable number of operands. */
24998
24999 static rtx
25000 ix86_expand_args_builtin (const struct builtin_description *d,
25001 tree exp, rtx target)
25002 {
25003 rtx pat, real_target;
25004 unsigned int i, nargs;
25005 unsigned int nargs_constant = 0;
25006 int num_memory = 0;
25007 struct
25008 {
25009 rtx op;
25010 enum machine_mode mode;
25011 } args[4];
25012 bool last_arg_count = false;
25013 enum insn_code icode = d->icode;
25014 const struct insn_data_d *insn_p = &insn_data[icode];
25015 enum machine_mode tmode = insn_p->operand[0].mode;
25016 enum machine_mode rmode = VOIDmode;
25017 bool swap = false;
25018 enum rtx_code comparison = d->comparison;
25019
25020 switch ((enum ix86_builtin_func_type) d->flag)
25021 {
25022 case INT_FTYPE_V8SF_V8SF_PTEST:
25023 case INT_FTYPE_V4DI_V4DI_PTEST:
25024 case INT_FTYPE_V4DF_V4DF_PTEST:
25025 case INT_FTYPE_V4SF_V4SF_PTEST:
25026 case INT_FTYPE_V2DI_V2DI_PTEST:
25027 case INT_FTYPE_V2DF_V2DF_PTEST:
25028 return ix86_expand_sse_ptest (d, exp, target);
25029 case FLOAT128_FTYPE_FLOAT128:
25030 case FLOAT_FTYPE_FLOAT:
25031 case INT_FTYPE_INT:
25032 case UINT64_FTYPE_INT:
25033 case UINT16_FTYPE_UINT16:
25034 case INT64_FTYPE_INT64:
25035 case INT64_FTYPE_V4SF:
25036 case INT64_FTYPE_V2DF:
25037 case INT_FTYPE_V16QI:
25038 case INT_FTYPE_V8QI:
25039 case INT_FTYPE_V8SF:
25040 case INT_FTYPE_V4DF:
25041 case INT_FTYPE_V4SF:
25042 case INT_FTYPE_V2DF:
25043 case V16QI_FTYPE_V16QI:
25044 case V8SI_FTYPE_V8SF:
25045 case V8SI_FTYPE_V4SI:
25046 case V8HI_FTYPE_V8HI:
25047 case V8HI_FTYPE_V16QI:
25048 case V8QI_FTYPE_V8QI:
25049 case V8SF_FTYPE_V8SF:
25050 case V8SF_FTYPE_V8SI:
25051 case V8SF_FTYPE_V4SF:
25052 case V8SF_FTYPE_V8HI:
25053 case V4SI_FTYPE_V4SI:
25054 case V4SI_FTYPE_V16QI:
25055 case V4SI_FTYPE_V4SF:
25056 case V4SI_FTYPE_V8SI:
25057 case V4SI_FTYPE_V8HI:
25058 case V4SI_FTYPE_V4DF:
25059 case V4SI_FTYPE_V2DF:
25060 case V4HI_FTYPE_V4HI:
25061 case V4DF_FTYPE_V4DF:
25062 case V4DF_FTYPE_V4SI:
25063 case V4DF_FTYPE_V4SF:
25064 case V4DF_FTYPE_V2DF:
25065 case V4SF_FTYPE_V4SF:
25066 case V4SF_FTYPE_V4SI:
25067 case V4SF_FTYPE_V8SF:
25068 case V4SF_FTYPE_V4DF:
25069 case V4SF_FTYPE_V8HI:
25070 case V4SF_FTYPE_V2DF:
25071 case V2DI_FTYPE_V2DI:
25072 case V2DI_FTYPE_V16QI:
25073 case V2DI_FTYPE_V8HI:
25074 case V2DI_FTYPE_V4SI:
25075 case V2DF_FTYPE_V2DF:
25076 case V2DF_FTYPE_V4SI:
25077 case V2DF_FTYPE_V4DF:
25078 case V2DF_FTYPE_V4SF:
25079 case V2DF_FTYPE_V2SI:
25080 case V2SI_FTYPE_V2SI:
25081 case V2SI_FTYPE_V4SF:
25082 case V2SI_FTYPE_V2SF:
25083 case V2SI_FTYPE_V2DF:
25084 case V2SF_FTYPE_V2SF:
25085 case V2SF_FTYPE_V2SI:
25086 nargs = 1;
25087 break;
25088 case V4SF_FTYPE_V4SF_VEC_MERGE:
25089 case V2DF_FTYPE_V2DF_VEC_MERGE:
25090 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
25091 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
25092 case V16QI_FTYPE_V16QI_V16QI:
25093 case V16QI_FTYPE_V8HI_V8HI:
25094 case V8QI_FTYPE_V8QI_V8QI:
25095 case V8QI_FTYPE_V4HI_V4HI:
25096 case V8HI_FTYPE_V8HI_V8HI:
25097 case V8HI_FTYPE_V16QI_V16QI:
25098 case V8HI_FTYPE_V4SI_V4SI:
25099 case V8SF_FTYPE_V8SF_V8SF:
25100 case V8SF_FTYPE_V8SF_V8SI:
25101 case V4SI_FTYPE_V4SI_V4SI:
25102 case V4SI_FTYPE_V8HI_V8HI:
25103 case V4SI_FTYPE_V4SF_V4SF:
25104 case V4SI_FTYPE_V2DF_V2DF:
25105 case V4HI_FTYPE_V4HI_V4HI:
25106 case V4HI_FTYPE_V8QI_V8QI:
25107 case V4HI_FTYPE_V2SI_V2SI:
25108 case V4DF_FTYPE_V4DF_V4DF:
25109 case V4DF_FTYPE_V4DF_V4DI:
25110 case V4SF_FTYPE_V4SF_V4SF:
25111 case V4SF_FTYPE_V4SF_V4SI:
25112 case V4SF_FTYPE_V4SF_V2SI:
25113 case V4SF_FTYPE_V4SF_V2DF:
25114 case V4SF_FTYPE_V4SF_DI:
25115 case V4SF_FTYPE_V4SF_SI:
25116 case V2DI_FTYPE_V2DI_V2DI:
25117 case V2DI_FTYPE_V16QI_V16QI:
25118 case V2DI_FTYPE_V4SI_V4SI:
25119 case V2DI_FTYPE_V2DI_V16QI:
25120 case V2DI_FTYPE_V2DF_V2DF:
25121 case V2SI_FTYPE_V2SI_V2SI:
25122 case V2SI_FTYPE_V4HI_V4HI:
25123 case V2SI_FTYPE_V2SF_V2SF:
25124 case V2DF_FTYPE_V2DF_V2DF:
25125 case V2DF_FTYPE_V2DF_V4SF:
25126 case V2DF_FTYPE_V2DF_V2DI:
25127 case V2DF_FTYPE_V2DF_DI:
25128 case V2DF_FTYPE_V2DF_SI:
25129 case V2SF_FTYPE_V2SF_V2SF:
25130 case V1DI_FTYPE_V1DI_V1DI:
25131 case V1DI_FTYPE_V8QI_V8QI:
25132 case V1DI_FTYPE_V2SI_V2SI:
25133 if (comparison == UNKNOWN)
25134 return ix86_expand_binop_builtin (icode, exp, target);
25135 nargs = 2;
25136 break;
25137 case V4SF_FTYPE_V4SF_V4SF_SWAP:
25138 case V2DF_FTYPE_V2DF_V2DF_SWAP:
25139 gcc_assert (comparison != UNKNOWN);
25140 nargs = 2;
25141 swap = true;
25142 break;
25143 case V8HI_FTYPE_V8HI_V8HI_COUNT:
25144 case V8HI_FTYPE_V8HI_SI_COUNT:
25145 case V4SI_FTYPE_V4SI_V4SI_COUNT:
25146 case V4SI_FTYPE_V4SI_SI_COUNT:
25147 case V4HI_FTYPE_V4HI_V4HI_COUNT:
25148 case V4HI_FTYPE_V4HI_SI_COUNT:
25149 case V2DI_FTYPE_V2DI_V2DI_COUNT:
25150 case V2DI_FTYPE_V2DI_SI_COUNT:
25151 case V2SI_FTYPE_V2SI_V2SI_COUNT:
25152 case V2SI_FTYPE_V2SI_SI_COUNT:
25153 case V1DI_FTYPE_V1DI_V1DI_COUNT:
25154 case V1DI_FTYPE_V1DI_SI_COUNT:
25155 nargs = 2;
25156 last_arg_count = true;
25157 break;
25158 case UINT64_FTYPE_UINT64_UINT64:
25159 case UINT_FTYPE_UINT_UINT:
25160 case UINT_FTYPE_UINT_USHORT:
25161 case UINT_FTYPE_UINT_UCHAR:
25162 case UINT16_FTYPE_UINT16_INT:
25163 case UINT8_FTYPE_UINT8_INT:
25164 nargs = 2;
25165 break;
25166 case V2DI_FTYPE_V2DI_INT_CONVERT:
25167 nargs = 2;
25168 rmode = V1TImode;
25169 nargs_constant = 1;
25170 break;
25171 case V8HI_FTYPE_V8HI_INT:
25172 case V8HI_FTYPE_V8SF_INT:
25173 case V8HI_FTYPE_V4SF_INT:
25174 case V8SF_FTYPE_V8SF_INT:
25175 case V4SI_FTYPE_V4SI_INT:
25176 case V4SI_FTYPE_V8SI_INT:
25177 case V4HI_FTYPE_V4HI_INT:
25178 case V4DF_FTYPE_V4DF_INT:
25179 case V4SF_FTYPE_V4SF_INT:
25180 case V4SF_FTYPE_V8SF_INT:
25181 case V2DI_FTYPE_V2DI_INT:
25182 case V2DF_FTYPE_V2DF_INT:
25183 case V2DF_FTYPE_V4DF_INT:
25184 nargs = 2;
25185 nargs_constant = 1;
25186 break;
25187 case V16QI_FTYPE_V16QI_V16QI_V16QI:
25188 case V8SF_FTYPE_V8SF_V8SF_V8SF:
25189 case V4DF_FTYPE_V4DF_V4DF_V4DF:
25190 case V4SF_FTYPE_V4SF_V4SF_V4SF:
25191 case V2DF_FTYPE_V2DF_V2DF_V2DF:
25192 nargs = 3;
25193 break;
25194 case V16QI_FTYPE_V16QI_V16QI_INT:
25195 case V8HI_FTYPE_V8HI_V8HI_INT:
25196 case V8SI_FTYPE_V8SI_V8SI_INT:
25197 case V8SI_FTYPE_V8SI_V4SI_INT:
25198 case V8SF_FTYPE_V8SF_V8SF_INT:
25199 case V8SF_FTYPE_V8SF_V4SF_INT:
25200 case V4SI_FTYPE_V4SI_V4SI_INT:
25201 case V4DF_FTYPE_V4DF_V4DF_INT:
25202 case V4DF_FTYPE_V4DF_V2DF_INT:
25203 case V4SF_FTYPE_V4SF_V4SF_INT:
25204 case V2DI_FTYPE_V2DI_V2DI_INT:
25205 case V2DF_FTYPE_V2DF_V2DF_INT:
25206 nargs = 3;
25207 nargs_constant = 1;
25208 break;
25209 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
25210 nargs = 3;
25211 rmode = V2DImode;
25212 nargs_constant = 1;
25213 break;
25214 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
25215 nargs = 3;
25216 rmode = DImode;
25217 nargs_constant = 1;
25218 break;
25219 case V2DI_FTYPE_V2DI_UINT_UINT:
25220 nargs = 3;
25221 nargs_constant = 2;
25222 break;
25223 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
25224 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
25225 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
25226 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
25227 nargs = 4;
25228 nargs_constant = 1;
25229 break;
25230 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
25231 nargs = 4;
25232 nargs_constant = 2;
25233 break;
25234 default:
25235 gcc_unreachable ();
25236 }
25237
25238 gcc_assert (nargs <= ARRAY_SIZE (args));
25239
25240 if (comparison != UNKNOWN)
25241 {
25242 gcc_assert (nargs == 2);
25243 return ix86_expand_sse_compare (d, exp, target, swap);
25244 }
25245
25246 if (rmode == VOIDmode || rmode == tmode)
25247 {
25248 if (optimize
25249 || target == 0
25250 || GET_MODE (target) != tmode
25251 || !insn_p->operand[0].predicate (target, tmode))
25252 target = gen_reg_rtx (tmode);
25253 real_target = target;
25254 }
25255 else
25256 {
25257 target = gen_reg_rtx (rmode);
25258 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
25259 }
25260
25261 for (i = 0; i < nargs; i++)
25262 {
25263 tree arg = CALL_EXPR_ARG (exp, i);
25264 rtx op = expand_normal (arg);
25265 enum machine_mode mode = insn_p->operand[i + 1].mode;
25266 bool match = insn_p->operand[i + 1].predicate (op, mode);
25267
25268 if (last_arg_count && (i + 1) == nargs)
25269 {
25270 /* SIMD shift insns take either an 8-bit immediate or
25271 register as count. But builtin functions take int as
25272 count. If count doesn't match, we put it in register. */
25273 if (!match)
25274 {
25275 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
25276 if (!insn_p->operand[i + 1].predicate (op, mode))
25277 op = copy_to_reg (op);
25278 }
25279 }
25280 else if ((nargs - i) <= nargs_constant)
25281 {
25282 if (!match)
25283 switch (icode)
25284 {
25285 case CODE_FOR_sse4_1_roundpd:
25286 case CODE_FOR_sse4_1_roundps:
25287 case CODE_FOR_sse4_1_roundsd:
25288 case CODE_FOR_sse4_1_roundss:
25289 case CODE_FOR_sse4_1_blendps:
25290 case CODE_FOR_avx_blendpd256:
25291 case CODE_FOR_avx_vpermilv4df:
25292 case CODE_FOR_avx_roundpd256:
25293 case CODE_FOR_avx_roundps256:
25294 error ("the last argument must be a 4-bit immediate");
25295 return const0_rtx;
25296
25297 case CODE_FOR_sse4_1_blendpd:
25298 case CODE_FOR_avx_vpermilv2df:
25299 case CODE_FOR_xop_vpermil2v2df3:
25300 case CODE_FOR_xop_vpermil2v4sf3:
25301 case CODE_FOR_xop_vpermil2v4df3:
25302 case CODE_FOR_xop_vpermil2v8sf3:
25303 error ("the last argument must be a 2-bit immediate");
25304 return const0_rtx;
25305
25306 case CODE_FOR_avx_vextractf128v4df:
25307 case CODE_FOR_avx_vextractf128v8sf:
25308 case CODE_FOR_avx_vextractf128v8si:
25309 case CODE_FOR_avx_vinsertf128v4df:
25310 case CODE_FOR_avx_vinsertf128v8sf:
25311 case CODE_FOR_avx_vinsertf128v8si:
25312 error ("the last argument must be a 1-bit immediate");
25313 return const0_rtx;
25314
25315 case CODE_FOR_avx_cmpsdv2df3:
25316 case CODE_FOR_avx_cmpssv4sf3:
25317 case CODE_FOR_avx_cmppdv2df3:
25318 case CODE_FOR_avx_cmppsv4sf3:
25319 case CODE_FOR_avx_cmppdv4df3:
25320 case CODE_FOR_avx_cmppsv8sf3:
25321 error ("the last argument must be a 5-bit immediate");
25322 return const0_rtx;
25323
25324 default:
25325 switch (nargs_constant)
25326 {
25327 case 2:
25328 if ((nargs - i) == nargs_constant)
25329 {
25330 error ("the next to last argument must be an 8-bit immediate");
25331 break;
25332 }
25333 case 1:
25334 error ("the last argument must be an 8-bit immediate");
25335 break;
25336 default:
25337 gcc_unreachable ();
25338 }
25339 return const0_rtx;
25340 }
25341 }
25342 else
25343 {
25344 if (VECTOR_MODE_P (mode))
25345 op = safe_vector_operand (op, mode);
25346
25347 /* If we aren't optimizing, only allow one memory operand to
25348 be generated. */
25349 if (memory_operand (op, mode))
25350 num_memory++;
25351
25352 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
25353 {
25354 if (optimize || !match || num_memory > 1)
25355 op = copy_to_mode_reg (mode, op);
25356 }
25357 else
25358 {
25359 op = copy_to_reg (op);
25360 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
25361 }
25362 }
25363
25364 args[i].op = op;
25365 args[i].mode = mode;
25366 }
25367
25368 switch (nargs)
25369 {
25370 case 1:
25371 pat = GEN_FCN (icode) (real_target, args[0].op);
25372 break;
25373 case 2:
25374 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
25375 break;
25376 case 3:
25377 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25378 args[2].op);
25379 break;
25380 case 4:
25381 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25382 args[2].op, args[3].op);
25383 break;
25384 default:
25385 gcc_unreachable ();
25386 }
25387
25388 if (! pat)
25389 return 0;
25390
25391 emit_insn (pat);
25392 return target;
25393 }
25394
25395 /* Subroutine of ix86_expand_builtin to take care of special insns
25396 with variable number of operands. */
25397
25398 static rtx
25399 ix86_expand_special_args_builtin (const struct builtin_description *d,
25400 tree exp, rtx target)
25401 {
25402 tree arg;
25403 rtx pat, op;
25404 unsigned int i, nargs, arg_adjust, memory;
25405 struct
25406 {
25407 rtx op;
25408 enum machine_mode mode;
25409 } args[3];
25410 enum insn_code icode = d->icode;
25411 bool last_arg_constant = false;
25412 const struct insn_data_d *insn_p = &insn_data[icode];
25413 enum machine_mode tmode = insn_p->operand[0].mode;
25414 enum { load, store } klass;
25415
25416 switch ((enum ix86_builtin_func_type) d->flag)
25417 {
25418 case VOID_FTYPE_VOID:
25419 emit_insn (GEN_FCN (icode) (target));
25420 return 0;
25421 case VOID_FTYPE_UINT64:
25422 case VOID_FTYPE_UNSIGNED:
25423 nargs = 0;
25424 klass = store;
25425 memory = 0;
25426 break;
25427 break;
25428 case UINT64_FTYPE_VOID:
25429 case UNSIGNED_FTYPE_VOID:
25430 case UINT16_FTYPE_VOID:
25431 nargs = 0;
25432 klass = load;
25433 memory = 0;
25434 break;
25435 case UINT64_FTYPE_PUNSIGNED:
25436 case V2DI_FTYPE_PV2DI:
25437 case V32QI_FTYPE_PCCHAR:
25438 case V16QI_FTYPE_PCCHAR:
25439 case V8SF_FTYPE_PCV4SF:
25440 case V8SF_FTYPE_PCFLOAT:
25441 case V4SF_FTYPE_PCFLOAT:
25442 case V4DF_FTYPE_PCV2DF:
25443 case V4DF_FTYPE_PCDOUBLE:
25444 case V2DF_FTYPE_PCDOUBLE:
25445 case VOID_FTYPE_PVOID:
25446 nargs = 1;
25447 klass = load;
25448 memory = 0;
25449 break;
25450 case VOID_FTYPE_PV2SF_V4SF:
25451 case VOID_FTYPE_PV4DI_V4DI:
25452 case VOID_FTYPE_PV2DI_V2DI:
25453 case VOID_FTYPE_PCHAR_V32QI:
25454 case VOID_FTYPE_PCHAR_V16QI:
25455 case VOID_FTYPE_PFLOAT_V8SF:
25456 case VOID_FTYPE_PFLOAT_V4SF:
25457 case VOID_FTYPE_PDOUBLE_V4DF:
25458 case VOID_FTYPE_PDOUBLE_V2DF:
25459 case VOID_FTYPE_PULONGLONG_ULONGLONG:
25460 case VOID_FTYPE_PINT_INT:
25461 nargs = 1;
25462 klass = store;
25463 /* Reserve memory operand for target. */
25464 memory = ARRAY_SIZE (args);
25465 break;
25466 case V4SF_FTYPE_V4SF_PCV2SF:
25467 case V2DF_FTYPE_V2DF_PCDOUBLE:
25468 nargs = 2;
25469 klass = load;
25470 memory = 1;
25471 break;
25472 case V8SF_FTYPE_PCV8SF_V8SF:
25473 case V4DF_FTYPE_PCV4DF_V4DF:
25474 case V4SF_FTYPE_PCV4SF_V4SF:
25475 case V2DF_FTYPE_PCV2DF_V2DF:
25476 nargs = 2;
25477 klass = load;
25478 memory = 0;
25479 break;
25480 case VOID_FTYPE_PV8SF_V8SF_V8SF:
25481 case VOID_FTYPE_PV4DF_V4DF_V4DF:
25482 case VOID_FTYPE_PV4SF_V4SF_V4SF:
25483 case VOID_FTYPE_PV2DF_V2DF_V2DF:
25484 nargs = 2;
25485 klass = store;
25486 /* Reserve memory operand for target. */
25487 memory = ARRAY_SIZE (args);
25488 break;
25489 case VOID_FTYPE_UINT_UINT_UINT:
25490 case VOID_FTYPE_UINT64_UINT_UINT:
25491 case UCHAR_FTYPE_UINT_UINT_UINT:
25492 case UCHAR_FTYPE_UINT64_UINT_UINT:
25493 nargs = 3;
25494 klass = load;
25495 memory = ARRAY_SIZE (args);
25496 last_arg_constant = true;
25497 break;
25498 default:
25499 gcc_unreachable ();
25500 }
25501
25502 gcc_assert (nargs <= ARRAY_SIZE (args));
25503
25504 if (klass == store)
25505 {
25506 arg = CALL_EXPR_ARG (exp, 0);
25507 op = expand_normal (arg);
25508 gcc_assert (target == 0);
25509 if (memory)
25510 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
25511 else
25512 target = force_reg (tmode, op);
25513 arg_adjust = 1;
25514 }
25515 else
25516 {
25517 arg_adjust = 0;
25518 if (optimize
25519 || target == 0
25520 || GET_MODE (target) != tmode
25521 || !insn_p->operand[0].predicate (target, tmode))
25522 target = gen_reg_rtx (tmode);
25523 }
25524
25525 for (i = 0; i < nargs; i++)
25526 {
25527 enum machine_mode mode = insn_p->operand[i + 1].mode;
25528 bool match;
25529
25530 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
25531 op = expand_normal (arg);
25532 match = insn_p->operand[i + 1].predicate (op, mode);
25533
25534 if (last_arg_constant && (i + 1) == nargs)
25535 {
25536 if (!match)
25537 {
25538 if (icode == CODE_FOR_lwp_lwpvalsi3
25539 || icode == CODE_FOR_lwp_lwpinssi3
25540 || icode == CODE_FOR_lwp_lwpvaldi3
25541 || icode == CODE_FOR_lwp_lwpinsdi3)
25542 error ("the last argument must be a 32-bit immediate");
25543 else
25544 error ("the last argument must be an 8-bit immediate");
25545 return const0_rtx;
25546 }
25547 }
25548 else
25549 {
25550 if (i == memory)
25551 {
25552 /* This must be the memory operand. */
25553 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
25554 gcc_assert (GET_MODE (op) == mode
25555 || GET_MODE (op) == VOIDmode);
25556 }
25557 else
25558 {
25559 /* This must be register. */
25560 if (VECTOR_MODE_P (mode))
25561 op = safe_vector_operand (op, mode);
25562
25563 gcc_assert (GET_MODE (op) == mode
25564 || GET_MODE (op) == VOIDmode);
25565 op = copy_to_mode_reg (mode, op);
25566 }
25567 }
25568
25569 args[i].op = op;
25570 args[i].mode = mode;
25571 }
25572
25573 switch (nargs)
25574 {
25575 case 0:
25576 pat = GEN_FCN (icode) (target);
25577 break;
25578 case 1:
25579 pat = GEN_FCN (icode) (target, args[0].op);
25580 break;
25581 case 2:
25582 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25583 break;
25584 case 3:
25585 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
25586 break;
25587 default:
25588 gcc_unreachable ();
25589 }
25590
25591 if (! pat)
25592 return 0;
25593 emit_insn (pat);
25594 return klass == store ? 0 : target;
25595 }
25596
25597 /* Return the integer constant in ARG. Constrain it to be in the range
25598 of the subparts of VEC_TYPE; issue an error if not. */
25599
25600 static int
25601 get_element_number (tree vec_type, tree arg)
25602 {
25603 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
25604
25605 if (!host_integerp (arg, 1)
25606 || (elt = tree_low_cst (arg, 1), elt > max))
25607 {
25608 error ("selector must be an integer constant in the range 0..%wi", max);
25609 return 0;
25610 }
25611
25612 return elt;
25613 }
25614
25615 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25616 ix86_expand_vector_init. We DO have language-level syntax for this, in
25617 the form of (type){ init-list }. Except that since we can't place emms
25618 instructions from inside the compiler, we can't allow the use of MMX
25619 registers unless the user explicitly asks for it. So we do *not* define
25620 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
25621 we have builtins invoked by mmintrin.h that gives us license to emit
25622 these sorts of instructions. */
25623
25624 static rtx
25625 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
25626 {
25627 enum machine_mode tmode = TYPE_MODE (type);
25628 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
25629 int i, n_elt = GET_MODE_NUNITS (tmode);
25630 rtvec v = rtvec_alloc (n_elt);
25631
25632 gcc_assert (VECTOR_MODE_P (tmode));
25633 gcc_assert (call_expr_nargs (exp) == n_elt);
25634
25635 for (i = 0; i < n_elt; ++i)
25636 {
25637 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
25638 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
25639 }
25640
25641 if (!target || !register_operand (target, tmode))
25642 target = gen_reg_rtx (tmode);
25643
25644 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
25645 return target;
25646 }
25647
25648 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25649 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
25650 had a language-level syntax for referencing vector elements. */
25651
25652 static rtx
25653 ix86_expand_vec_ext_builtin (tree exp, rtx target)
25654 {
25655 enum machine_mode tmode, mode0;
25656 tree arg0, arg1;
25657 int elt;
25658 rtx op0;
25659
25660 arg0 = CALL_EXPR_ARG (exp, 0);
25661 arg1 = CALL_EXPR_ARG (exp, 1);
25662
25663 op0 = expand_normal (arg0);
25664 elt = get_element_number (TREE_TYPE (arg0), arg1);
25665
25666 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25667 mode0 = TYPE_MODE (TREE_TYPE (arg0));
25668 gcc_assert (VECTOR_MODE_P (mode0));
25669
25670 op0 = force_reg (mode0, op0);
25671
25672 if (optimize || !target || !register_operand (target, tmode))
25673 target = gen_reg_rtx (tmode);
25674
25675 ix86_expand_vector_extract (true, target, op0, elt);
25676
25677 return target;
25678 }
25679
25680 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25681 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
25682 a language-level syntax for referencing vector elements. */
25683
25684 static rtx
25685 ix86_expand_vec_set_builtin (tree exp)
25686 {
25687 enum machine_mode tmode, mode1;
25688 tree arg0, arg1, arg2;
25689 int elt;
25690 rtx op0, op1, target;
25691
25692 arg0 = CALL_EXPR_ARG (exp, 0);
25693 arg1 = CALL_EXPR_ARG (exp, 1);
25694 arg2 = CALL_EXPR_ARG (exp, 2);
25695
25696 tmode = TYPE_MODE (TREE_TYPE (arg0));
25697 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25698 gcc_assert (VECTOR_MODE_P (tmode));
25699
25700 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
25701 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
25702 elt = get_element_number (TREE_TYPE (arg0), arg2);
25703
25704 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
25705 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
25706
25707 op0 = force_reg (tmode, op0);
25708 op1 = force_reg (mode1, op1);
25709
25710 /* OP0 is the source of these builtin functions and shouldn't be
25711 modified. Create a copy, use it and return it as target. */
25712 target = gen_reg_rtx (tmode);
25713 emit_move_insn (target, op0);
25714 ix86_expand_vector_set (true, target, op1, elt);
25715
25716 return target;
25717 }
25718
25719 /* Expand an expression EXP that calls a built-in function,
25720 with result going to TARGET if that's convenient
25721 (and in mode MODE if that's convenient).
25722 SUBTARGET may be used as the target for computing one of EXP's operands.
25723 IGNORE is nonzero if the value is to be ignored. */
25724
25725 static rtx
25726 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25727 enum machine_mode mode ATTRIBUTE_UNUSED,
25728 int ignore ATTRIBUTE_UNUSED)
25729 {
25730 const struct builtin_description *d;
25731 size_t i;
25732 enum insn_code icode;
25733 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25734 tree arg0, arg1, arg2;
25735 rtx op0, op1, op2, pat;
25736 enum machine_mode mode0, mode1, mode2;
25737 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25738
25739 /* Determine whether the builtin function is available under the current ISA.
25740 Originally the builtin was not created if it wasn't applicable to the
25741 current ISA based on the command line switches. With function specific
25742 options, we need to check in the context of the function making the call
25743 whether it is supported. */
25744 if (ix86_builtins_isa[fcode].isa
25745 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25746 {
25747 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25748 NULL, NULL, false);
25749
25750 if (!opts)
25751 error ("%qE needs unknown isa option", fndecl);
25752 else
25753 {
25754 gcc_assert (opts != NULL);
25755 error ("%qE needs isa option %s", fndecl, opts);
25756 free (opts);
25757 }
25758 return const0_rtx;
25759 }
25760
25761 switch (fcode)
25762 {
25763 case IX86_BUILTIN_MASKMOVQ:
25764 case IX86_BUILTIN_MASKMOVDQU:
25765 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25766 ? CODE_FOR_mmx_maskmovq
25767 : CODE_FOR_sse2_maskmovdqu);
25768 /* Note the arg order is different from the operand order. */
25769 arg1 = CALL_EXPR_ARG (exp, 0);
25770 arg2 = CALL_EXPR_ARG (exp, 1);
25771 arg0 = CALL_EXPR_ARG (exp, 2);
25772 op0 = expand_normal (arg0);
25773 op1 = expand_normal (arg1);
25774 op2 = expand_normal (arg2);
25775 mode0 = insn_data[icode].operand[0].mode;
25776 mode1 = insn_data[icode].operand[1].mode;
25777 mode2 = insn_data[icode].operand[2].mode;
25778
25779 op0 = force_reg (Pmode, op0);
25780 op0 = gen_rtx_MEM (mode1, op0);
25781
25782 if (!insn_data[icode].operand[0].predicate (op0, mode0))
25783 op0 = copy_to_mode_reg (mode0, op0);
25784 if (!insn_data[icode].operand[1].predicate (op1, mode1))
25785 op1 = copy_to_mode_reg (mode1, op1);
25786 if (!insn_data[icode].operand[2].predicate (op2, mode2))
25787 op2 = copy_to_mode_reg (mode2, op2);
25788 pat = GEN_FCN (icode) (op0, op1, op2);
25789 if (! pat)
25790 return 0;
25791 emit_insn (pat);
25792 return 0;
25793
25794 case IX86_BUILTIN_LDMXCSR:
25795 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25796 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25797 emit_move_insn (target, op0);
25798 emit_insn (gen_sse_ldmxcsr (target));
25799 return 0;
25800
25801 case IX86_BUILTIN_STMXCSR:
25802 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25803 emit_insn (gen_sse_stmxcsr (target));
25804 return copy_to_mode_reg (SImode, target);
25805
25806 case IX86_BUILTIN_CLFLUSH:
25807 arg0 = CALL_EXPR_ARG (exp, 0);
25808 op0 = expand_normal (arg0);
25809 icode = CODE_FOR_sse2_clflush;
25810 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25811 op0 = copy_to_mode_reg (Pmode, op0);
25812
25813 emit_insn (gen_sse2_clflush (op0));
25814 return 0;
25815
25816 case IX86_BUILTIN_MONITOR:
25817 arg0 = CALL_EXPR_ARG (exp, 0);
25818 arg1 = CALL_EXPR_ARG (exp, 1);
25819 arg2 = CALL_EXPR_ARG (exp, 2);
25820 op0 = expand_normal (arg0);
25821 op1 = expand_normal (arg1);
25822 op2 = expand_normal (arg2);
25823 if (!REG_P (op0))
25824 op0 = copy_to_mode_reg (Pmode, op0);
25825 if (!REG_P (op1))
25826 op1 = copy_to_mode_reg (SImode, op1);
25827 if (!REG_P (op2))
25828 op2 = copy_to_mode_reg (SImode, op2);
25829 emit_insn (ix86_gen_monitor (op0, op1, op2));
25830 return 0;
25831
25832 case IX86_BUILTIN_MWAIT:
25833 arg0 = CALL_EXPR_ARG (exp, 0);
25834 arg1 = CALL_EXPR_ARG (exp, 1);
25835 op0 = expand_normal (arg0);
25836 op1 = expand_normal (arg1);
25837 if (!REG_P (op0))
25838 op0 = copy_to_mode_reg (SImode, op0);
25839 if (!REG_P (op1))
25840 op1 = copy_to_mode_reg (SImode, op1);
25841 emit_insn (gen_sse3_mwait (op0, op1));
25842 return 0;
25843
25844 case IX86_BUILTIN_VEC_INIT_V2SI:
25845 case IX86_BUILTIN_VEC_INIT_V4HI:
25846 case IX86_BUILTIN_VEC_INIT_V8QI:
25847 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25848
25849 case IX86_BUILTIN_VEC_EXT_V2DF:
25850 case IX86_BUILTIN_VEC_EXT_V2DI:
25851 case IX86_BUILTIN_VEC_EXT_V4SF:
25852 case IX86_BUILTIN_VEC_EXT_V4SI:
25853 case IX86_BUILTIN_VEC_EXT_V8HI:
25854 case IX86_BUILTIN_VEC_EXT_V2SI:
25855 case IX86_BUILTIN_VEC_EXT_V4HI:
25856 case IX86_BUILTIN_VEC_EXT_V16QI:
25857 return ix86_expand_vec_ext_builtin (exp, target);
25858
25859 case IX86_BUILTIN_VEC_SET_V2DI:
25860 case IX86_BUILTIN_VEC_SET_V4SF:
25861 case IX86_BUILTIN_VEC_SET_V4SI:
25862 case IX86_BUILTIN_VEC_SET_V8HI:
25863 case IX86_BUILTIN_VEC_SET_V4HI:
25864 case IX86_BUILTIN_VEC_SET_V16QI:
25865 return ix86_expand_vec_set_builtin (exp);
25866
25867 case IX86_BUILTIN_VEC_PERM_V2DF:
25868 case IX86_BUILTIN_VEC_PERM_V4SF:
25869 case IX86_BUILTIN_VEC_PERM_V2DI:
25870 case IX86_BUILTIN_VEC_PERM_V4SI:
25871 case IX86_BUILTIN_VEC_PERM_V8HI:
25872 case IX86_BUILTIN_VEC_PERM_V16QI:
25873 case IX86_BUILTIN_VEC_PERM_V2DI_U:
25874 case IX86_BUILTIN_VEC_PERM_V4SI_U:
25875 case IX86_BUILTIN_VEC_PERM_V8HI_U:
25876 case IX86_BUILTIN_VEC_PERM_V16QI_U:
25877 case IX86_BUILTIN_VEC_PERM_V4DF:
25878 case IX86_BUILTIN_VEC_PERM_V8SF:
25879 return ix86_expand_vec_perm_builtin (exp);
25880
25881 case IX86_BUILTIN_INFQ:
25882 case IX86_BUILTIN_HUGE_VALQ:
25883 {
25884 REAL_VALUE_TYPE inf;
25885 rtx tmp;
25886
25887 real_inf (&inf);
25888 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25889
25890 tmp = validize_mem (force_const_mem (mode, tmp));
25891
25892 if (target == 0)
25893 target = gen_reg_rtx (mode);
25894
25895 emit_move_insn (target, tmp);
25896 return target;
25897 }
25898
25899 case IX86_BUILTIN_LLWPCB:
25900 arg0 = CALL_EXPR_ARG (exp, 0);
25901 op0 = expand_normal (arg0);
25902 icode = CODE_FOR_lwp_llwpcb;
25903 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25904 op0 = copy_to_mode_reg (Pmode, op0);
25905 emit_insn (gen_lwp_llwpcb (op0));
25906 return 0;
25907
25908 case IX86_BUILTIN_SLWPCB:
25909 icode = CODE_FOR_lwp_slwpcb;
25910 if (!target
25911 || !insn_data[icode].operand[0].predicate (target, Pmode))
25912 target = gen_reg_rtx (Pmode);
25913 emit_insn (gen_lwp_slwpcb (target));
25914 return target;
25915
25916 default:
25917 break;
25918 }
25919
25920 for (i = 0, d = bdesc_special_args;
25921 i < ARRAY_SIZE (bdesc_special_args);
25922 i++, d++)
25923 if (d->code == fcode)
25924 return ix86_expand_special_args_builtin (d, exp, target);
25925
25926 for (i = 0, d = bdesc_args;
25927 i < ARRAY_SIZE (bdesc_args);
25928 i++, d++)
25929 if (d->code == fcode)
25930 switch (fcode)
25931 {
25932 case IX86_BUILTIN_FABSQ:
25933 case IX86_BUILTIN_COPYSIGNQ:
25934 if (!TARGET_SSE2)
25935 /* Emit a normal call if SSE2 isn't available. */
25936 return expand_call (exp, target, ignore);
25937 default:
25938 return ix86_expand_args_builtin (d, exp, target);
25939 }
25940
25941 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25942 if (d->code == fcode)
25943 return ix86_expand_sse_comi (d, exp, target);
25944
25945 for (i = 0, d = bdesc_pcmpestr;
25946 i < ARRAY_SIZE (bdesc_pcmpestr);
25947 i++, d++)
25948 if (d->code == fcode)
25949 return ix86_expand_sse_pcmpestr (d, exp, target);
25950
25951 for (i = 0, d = bdesc_pcmpistr;
25952 i < ARRAY_SIZE (bdesc_pcmpistr);
25953 i++, d++)
25954 if (d->code == fcode)
25955 return ix86_expand_sse_pcmpistr (d, exp, target);
25956
25957 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25958 if (d->code == fcode)
25959 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25960 (enum ix86_builtin_func_type)
25961 d->flag, d->comparison);
25962
25963 gcc_unreachable ();
25964 }
25965
25966 /* Returns a function decl for a vectorized version of the builtin function
25967 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25968 if it is not available. */
25969
25970 static tree
25971 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
25972 tree type_in)
25973 {
25974 enum machine_mode in_mode, out_mode;
25975 int in_n, out_n;
25976 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
25977
25978 if (TREE_CODE (type_out) != VECTOR_TYPE
25979 || TREE_CODE (type_in) != VECTOR_TYPE
25980 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
25981 return NULL_TREE;
25982
25983 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25984 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25985 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25986 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25987
25988 switch (fn)
25989 {
25990 case BUILT_IN_SQRT:
25991 if (out_mode == DFmode && out_n == 2
25992 && in_mode == DFmode && in_n == 2)
25993 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25994 break;
25995
25996 case BUILT_IN_SQRTF:
25997 if (out_mode == SFmode && out_n == 4
25998 && in_mode == SFmode && in_n == 4)
25999 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
26000 break;
26001
26002 case BUILT_IN_LRINT:
26003 if (out_mode == SImode && out_n == 4
26004 && in_mode == DFmode && in_n == 2)
26005 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
26006 break;
26007
26008 case BUILT_IN_LRINTF:
26009 if (out_mode == SImode && out_n == 4
26010 && in_mode == SFmode && in_n == 4)
26011 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
26012 break;
26013
26014 case BUILT_IN_COPYSIGN:
26015 if (out_mode == DFmode && out_n == 2
26016 && in_mode == DFmode && in_n == 2)
26017 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
26018 break;
26019
26020 case BUILT_IN_COPYSIGNF:
26021 if (out_mode == SFmode && out_n == 4
26022 && in_mode == SFmode && in_n == 4)
26023 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
26024 break;
26025
26026 default:
26027 ;
26028 }
26029
26030 /* Dispatch to a handler for a vectorization library. */
26031 if (ix86_veclib_handler)
26032 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
26033 type_in);
26034
26035 return NULL_TREE;
26036 }
26037
26038 /* Handler for an SVML-style interface to
26039 a library with vectorized intrinsics. */
26040
26041 static tree
26042 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
26043 {
26044 char name[20];
26045 tree fntype, new_fndecl, args;
26046 unsigned arity;
26047 const char *bname;
26048 enum machine_mode el_mode, in_mode;
26049 int n, in_n;
26050
26051 /* The SVML is suitable for unsafe math only. */
26052 if (!flag_unsafe_math_optimizations)
26053 return NULL_TREE;
26054
26055 el_mode = TYPE_MODE (TREE_TYPE (type_out));
26056 n = TYPE_VECTOR_SUBPARTS (type_out);
26057 in_mode = TYPE_MODE (TREE_TYPE (type_in));
26058 in_n = TYPE_VECTOR_SUBPARTS (type_in);
26059 if (el_mode != in_mode
26060 || n != in_n)
26061 return NULL_TREE;
26062
26063 switch (fn)
26064 {
26065 case BUILT_IN_EXP:
26066 case BUILT_IN_LOG:
26067 case BUILT_IN_LOG10:
26068 case BUILT_IN_POW:
26069 case BUILT_IN_TANH:
26070 case BUILT_IN_TAN:
26071 case BUILT_IN_ATAN:
26072 case BUILT_IN_ATAN2:
26073 case BUILT_IN_ATANH:
26074 case BUILT_IN_CBRT:
26075 case BUILT_IN_SINH:
26076 case BUILT_IN_SIN:
26077 case BUILT_IN_ASINH:
26078 case BUILT_IN_ASIN:
26079 case BUILT_IN_COSH:
26080 case BUILT_IN_COS:
26081 case BUILT_IN_ACOSH:
26082 case BUILT_IN_ACOS:
26083 if (el_mode != DFmode || n != 2)
26084 return NULL_TREE;
26085 break;
26086
26087 case BUILT_IN_EXPF:
26088 case BUILT_IN_LOGF:
26089 case BUILT_IN_LOG10F:
26090 case BUILT_IN_POWF:
26091 case BUILT_IN_TANHF:
26092 case BUILT_IN_TANF:
26093 case BUILT_IN_ATANF:
26094 case BUILT_IN_ATAN2F:
26095 case BUILT_IN_ATANHF:
26096 case BUILT_IN_CBRTF:
26097 case BUILT_IN_SINHF:
26098 case BUILT_IN_SINF:
26099 case BUILT_IN_ASINHF:
26100 case BUILT_IN_ASINF:
26101 case BUILT_IN_COSHF:
26102 case BUILT_IN_COSF:
26103 case BUILT_IN_ACOSHF:
26104 case BUILT_IN_ACOSF:
26105 if (el_mode != SFmode || n != 4)
26106 return NULL_TREE;
26107 break;
26108
26109 default:
26110 return NULL_TREE;
26111 }
26112
26113 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
26114
26115 if (fn == BUILT_IN_LOGF)
26116 strcpy (name, "vmlsLn4");
26117 else if (fn == BUILT_IN_LOG)
26118 strcpy (name, "vmldLn2");
26119 else if (n == 4)
26120 {
26121 sprintf (name, "vmls%s", bname+10);
26122 name[strlen (name)-1] = '4';
26123 }
26124 else
26125 sprintf (name, "vmld%s2", bname+10);
26126
26127 /* Convert to uppercase. */
26128 name[4] &= ~0x20;
26129
26130 arity = 0;
26131 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
26132 args = TREE_CHAIN (args))
26133 arity++;
26134
26135 if (arity == 1)
26136 fntype = build_function_type_list (type_out, type_in, NULL);
26137 else
26138 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
26139
26140 /* Build a function declaration for the vectorized function. */
26141 new_fndecl = build_decl (BUILTINS_LOCATION,
26142 FUNCTION_DECL, get_identifier (name), fntype);
26143 TREE_PUBLIC (new_fndecl) = 1;
26144 DECL_EXTERNAL (new_fndecl) = 1;
26145 DECL_IS_NOVOPS (new_fndecl) = 1;
26146 TREE_READONLY (new_fndecl) = 1;
26147
26148 return new_fndecl;
26149 }
26150
26151 /* Handler for an ACML-style interface to
26152 a library with vectorized intrinsics. */
26153
26154 static tree
26155 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
26156 {
26157 char name[20] = "__vr.._";
26158 tree fntype, new_fndecl, args;
26159 unsigned arity;
26160 const char *bname;
26161 enum machine_mode el_mode, in_mode;
26162 int n, in_n;
26163
26164 /* The ACML is 64bits only and suitable for unsafe math only as
26165 it does not correctly support parts of IEEE with the required
26166 precision such as denormals. */
26167 if (!TARGET_64BIT
26168 || !flag_unsafe_math_optimizations)
26169 return NULL_TREE;
26170
26171 el_mode = TYPE_MODE (TREE_TYPE (type_out));
26172 n = TYPE_VECTOR_SUBPARTS (type_out);
26173 in_mode = TYPE_MODE (TREE_TYPE (type_in));
26174 in_n = TYPE_VECTOR_SUBPARTS (type_in);
26175 if (el_mode != in_mode
26176 || n != in_n)
26177 return NULL_TREE;
26178
26179 switch (fn)
26180 {
26181 case BUILT_IN_SIN:
26182 case BUILT_IN_COS:
26183 case BUILT_IN_EXP:
26184 case BUILT_IN_LOG:
26185 case BUILT_IN_LOG2:
26186 case BUILT_IN_LOG10:
26187 name[4] = 'd';
26188 name[5] = '2';
26189 if (el_mode != DFmode
26190 || n != 2)
26191 return NULL_TREE;
26192 break;
26193
26194 case BUILT_IN_SINF:
26195 case BUILT_IN_COSF:
26196 case BUILT_IN_EXPF:
26197 case BUILT_IN_POWF:
26198 case BUILT_IN_LOGF:
26199 case BUILT_IN_LOG2F:
26200 case BUILT_IN_LOG10F:
26201 name[4] = 's';
26202 name[5] = '4';
26203 if (el_mode != SFmode
26204 || n != 4)
26205 return NULL_TREE;
26206 break;
26207
26208 default:
26209 return NULL_TREE;
26210 }
26211
26212 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
26213 sprintf (name + 7, "%s", bname+10);
26214
26215 arity = 0;
26216 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
26217 args = TREE_CHAIN (args))
26218 arity++;
26219
26220 if (arity == 1)
26221 fntype = build_function_type_list (type_out, type_in, NULL);
26222 else
26223 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
26224
26225 /* Build a function declaration for the vectorized function. */
26226 new_fndecl = build_decl (BUILTINS_LOCATION,
26227 FUNCTION_DECL, get_identifier (name), fntype);
26228 TREE_PUBLIC (new_fndecl) = 1;
26229 DECL_EXTERNAL (new_fndecl) = 1;
26230 DECL_IS_NOVOPS (new_fndecl) = 1;
26231 TREE_READONLY (new_fndecl) = 1;
26232
26233 return new_fndecl;
26234 }
26235
26236
26237 /* Returns a decl of a function that implements conversion of an integer vector
26238 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
26239 are the types involved when converting according to CODE.
26240 Return NULL_TREE if it is not available. */
26241
26242 static tree
26243 ix86_vectorize_builtin_conversion (unsigned int code,
26244 tree dest_type, tree src_type)
26245 {
26246 if (! TARGET_SSE2)
26247 return NULL_TREE;
26248
26249 switch (code)
26250 {
26251 case FLOAT_EXPR:
26252 switch (TYPE_MODE (src_type))
26253 {
26254 case V4SImode:
26255 switch (TYPE_MODE (dest_type))
26256 {
26257 case V4SFmode:
26258 return (TYPE_UNSIGNED (src_type)
26259 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
26260 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
26261 case V4DFmode:
26262 return (TYPE_UNSIGNED (src_type)
26263 ? NULL_TREE
26264 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
26265 default:
26266 return NULL_TREE;
26267 }
26268 break;
26269 case V8SImode:
26270 switch (TYPE_MODE (dest_type))
26271 {
26272 case V8SFmode:
26273 return (TYPE_UNSIGNED (src_type)
26274 ? NULL_TREE
26275 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
26276 default:
26277 return NULL_TREE;
26278 }
26279 break;
26280 default:
26281 return NULL_TREE;
26282 }
26283
26284 case FIX_TRUNC_EXPR:
26285 switch (TYPE_MODE (dest_type))
26286 {
26287 case V4SImode:
26288 switch (TYPE_MODE (src_type))
26289 {
26290 case V4SFmode:
26291 return (TYPE_UNSIGNED (dest_type)
26292 ? NULL_TREE
26293 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
26294 case V4DFmode:
26295 return (TYPE_UNSIGNED (dest_type)
26296 ? NULL_TREE
26297 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
26298 default:
26299 return NULL_TREE;
26300 }
26301 break;
26302
26303 case V8SImode:
26304 switch (TYPE_MODE (src_type))
26305 {
26306 case V8SFmode:
26307 return (TYPE_UNSIGNED (dest_type)
26308 ? NULL_TREE
26309 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
26310 default:
26311 return NULL_TREE;
26312 }
26313 break;
26314
26315 default:
26316 return NULL_TREE;
26317 }
26318
26319 default:
26320 return NULL_TREE;
26321 }
26322
26323 return NULL_TREE;
26324 }
26325
26326 /* Returns a code for a target-specific builtin that implements
26327 reciprocal of the function, or NULL_TREE if not available. */
26328
26329 static tree
26330 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
26331 bool sqrt ATTRIBUTE_UNUSED)
26332 {
26333 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
26334 && flag_finite_math_only && !flag_trapping_math
26335 && flag_unsafe_math_optimizations))
26336 return NULL_TREE;
26337
26338 if (md_fn)
26339 /* Machine dependent builtins. */
26340 switch (fn)
26341 {
26342 /* Vectorized version of sqrt to rsqrt conversion. */
26343 case IX86_BUILTIN_SQRTPS_NR:
26344 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
26345
26346 default:
26347 return NULL_TREE;
26348 }
26349 else
26350 /* Normal builtins. */
26351 switch (fn)
26352 {
26353 /* Sqrt to rsqrt conversion. */
26354 case BUILT_IN_SQRTF:
26355 return ix86_builtins[IX86_BUILTIN_RSQRTF];
26356
26357 default:
26358 return NULL_TREE;
26359 }
26360 }
26361 \f
26362 /* Helper for avx_vpermilps256_operand et al. This is also used by
26363 the expansion functions to turn the parallel back into a mask.
26364 The return value is 0 for no match and the imm8+1 for a match. */
26365
26366 int
26367 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
26368 {
26369 unsigned i, nelt = GET_MODE_NUNITS (mode);
26370 unsigned mask = 0;
26371 unsigned char ipar[8];
26372
26373 if (XVECLEN (par, 0) != (int) nelt)
26374 return 0;
26375
26376 /* Validate that all of the elements are constants, and not totally
26377 out of range. Copy the data into an integral array to make the
26378 subsequent checks easier. */
26379 for (i = 0; i < nelt; ++i)
26380 {
26381 rtx er = XVECEXP (par, 0, i);
26382 unsigned HOST_WIDE_INT ei;
26383
26384 if (!CONST_INT_P (er))
26385 return 0;
26386 ei = INTVAL (er);
26387 if (ei >= nelt)
26388 return 0;
26389 ipar[i] = ei;
26390 }
26391
26392 switch (mode)
26393 {
26394 case V4DFmode:
26395 /* In the 256-bit DFmode case, we can only move elements within
26396 a 128-bit lane. */
26397 for (i = 0; i < 2; ++i)
26398 {
26399 if (ipar[i] >= 2)
26400 return 0;
26401 mask |= ipar[i] << i;
26402 }
26403 for (i = 2; i < 4; ++i)
26404 {
26405 if (ipar[i] < 2)
26406 return 0;
26407 mask |= (ipar[i] - 2) << i;
26408 }
26409 break;
26410
26411 case V8SFmode:
26412 /* In the 256-bit SFmode case, we have full freedom of movement
26413 within the low 128-bit lane, but the high 128-bit lane must
26414 mirror the exact same pattern. */
26415 for (i = 0; i < 4; ++i)
26416 if (ipar[i] + 4 != ipar[i + 4])
26417 return 0;
26418 nelt = 4;
26419 /* FALLTHRU */
26420
26421 case V2DFmode:
26422 case V4SFmode:
26423 /* In the 128-bit case, we've full freedom in the placement of
26424 the elements from the source operand. */
26425 for (i = 0; i < nelt; ++i)
26426 mask |= ipar[i] << (i * (nelt / 2));
26427 break;
26428
26429 default:
26430 gcc_unreachable ();
26431 }
26432
26433 /* Make sure success has a non-zero value by adding one. */
26434 return mask + 1;
26435 }
26436
26437 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
26438 the expansion functions to turn the parallel back into a mask.
26439 The return value is 0 for no match and the imm8+1 for a match. */
26440
26441 int
26442 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
26443 {
26444 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
26445 unsigned mask = 0;
26446 unsigned char ipar[8];
26447
26448 if (XVECLEN (par, 0) != (int) nelt)
26449 return 0;
26450
26451 /* Validate that all of the elements are constants, and not totally
26452 out of range. Copy the data into an integral array to make the
26453 subsequent checks easier. */
26454 for (i = 0; i < nelt; ++i)
26455 {
26456 rtx er = XVECEXP (par, 0, i);
26457 unsigned HOST_WIDE_INT ei;
26458
26459 if (!CONST_INT_P (er))
26460 return 0;
26461 ei = INTVAL (er);
26462 if (ei >= 2 * nelt)
26463 return 0;
26464 ipar[i] = ei;
26465 }
26466
26467 /* Validate that the halves of the permute are halves. */
26468 for (i = 0; i < nelt2 - 1; ++i)
26469 if (ipar[i] + 1 != ipar[i + 1])
26470 return 0;
26471 for (i = nelt2; i < nelt - 1; ++i)
26472 if (ipar[i] + 1 != ipar[i + 1])
26473 return 0;
26474
26475 /* Reconstruct the mask. */
26476 for (i = 0; i < 2; ++i)
26477 {
26478 unsigned e = ipar[i * nelt2];
26479 if (e % nelt2)
26480 return 0;
26481 e /= nelt2;
26482 mask |= e << (i * 4);
26483 }
26484
26485 /* Make sure success has a non-zero value by adding one. */
26486 return mask + 1;
26487 }
26488 \f
26489
26490 /* Store OPERAND to the memory after reload is completed. This means
26491 that we can't easily use assign_stack_local. */
26492 rtx
26493 ix86_force_to_memory (enum machine_mode mode, rtx operand)
26494 {
26495 rtx result;
26496
26497 gcc_assert (reload_completed);
26498 if (ix86_using_red_zone ())
26499 {
26500 result = gen_rtx_MEM (mode,
26501 gen_rtx_PLUS (Pmode,
26502 stack_pointer_rtx,
26503 GEN_INT (-RED_ZONE_SIZE)));
26504 emit_move_insn (result, operand);
26505 }
26506 else if (TARGET_64BIT)
26507 {
26508 switch (mode)
26509 {
26510 case HImode:
26511 case SImode:
26512 operand = gen_lowpart (DImode, operand);
26513 /* FALLTHRU */
26514 case DImode:
26515 emit_insn (
26516 gen_rtx_SET (VOIDmode,
26517 gen_rtx_MEM (DImode,
26518 gen_rtx_PRE_DEC (DImode,
26519 stack_pointer_rtx)),
26520 operand));
26521 break;
26522 default:
26523 gcc_unreachable ();
26524 }
26525 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26526 }
26527 else
26528 {
26529 switch (mode)
26530 {
26531 case DImode:
26532 {
26533 rtx operands[2];
26534 split_double_mode (mode, &operand, 1, operands, operands + 1);
26535 emit_insn (
26536 gen_rtx_SET (VOIDmode,
26537 gen_rtx_MEM (SImode,
26538 gen_rtx_PRE_DEC (Pmode,
26539 stack_pointer_rtx)),
26540 operands[1]));
26541 emit_insn (
26542 gen_rtx_SET (VOIDmode,
26543 gen_rtx_MEM (SImode,
26544 gen_rtx_PRE_DEC (Pmode,
26545 stack_pointer_rtx)),
26546 operands[0]));
26547 }
26548 break;
26549 case HImode:
26550 /* Store HImodes as SImodes. */
26551 operand = gen_lowpart (SImode, operand);
26552 /* FALLTHRU */
26553 case SImode:
26554 emit_insn (
26555 gen_rtx_SET (VOIDmode,
26556 gen_rtx_MEM (GET_MODE (operand),
26557 gen_rtx_PRE_DEC (SImode,
26558 stack_pointer_rtx)),
26559 operand));
26560 break;
26561 default:
26562 gcc_unreachable ();
26563 }
26564 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26565 }
26566 return result;
26567 }
26568
26569 /* Free operand from the memory. */
26570 void
26571 ix86_free_from_memory (enum machine_mode mode)
26572 {
26573 if (!ix86_using_red_zone ())
26574 {
26575 int size;
26576
26577 if (mode == DImode || TARGET_64BIT)
26578 size = 8;
26579 else
26580 size = 4;
26581 /* Use LEA to deallocate stack space. In peephole2 it will be converted
26582 to pop or add instruction if registers are available. */
26583 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
26584 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
26585 GEN_INT (size))));
26586 }
26587 }
26588
26589 /* Implement TARGET_IRA_COVER_CLASSES. If -mfpmath=sse, we prefer
26590 SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
26591 same. */
26592 static const reg_class_t *
26593 i386_ira_cover_classes (void)
26594 {
26595 static const reg_class_t sse_fpmath_classes[] = {
26596 GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
26597 };
26598 static const reg_class_t no_sse_fpmath_classes[] = {
26599 GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
26600 };
26601
26602 return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes;
26603 }
26604
26605 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
26606 QImode must go into class Q_REGS.
26607 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
26608 movdf to do mem-to-mem moves through integer regs. */
26609 enum reg_class
26610 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
26611 {
26612 enum machine_mode mode = GET_MODE (x);
26613
26614 /* We're only allowed to return a subclass of CLASS. Many of the
26615 following checks fail for NO_REGS, so eliminate that early. */
26616 if (regclass == NO_REGS)
26617 return NO_REGS;
26618
26619 /* All classes can load zeros. */
26620 if (x == CONST0_RTX (mode))
26621 return regclass;
26622
26623 /* Force constants into memory if we are loading a (nonzero) constant into
26624 an MMX or SSE register. This is because there are no MMX/SSE instructions
26625 to load from a constant. */
26626 if (CONSTANT_P (x)
26627 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
26628 return NO_REGS;
26629
26630 /* Prefer SSE regs only, if we can use them for math. */
26631 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
26632 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
26633
26634 /* Floating-point constants need more complex checks. */
26635 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
26636 {
26637 /* General regs can load everything. */
26638 if (reg_class_subset_p (regclass, GENERAL_REGS))
26639 return regclass;
26640
26641 /* Floats can load 0 and 1 plus some others. Note that we eliminated
26642 zero above. We only want to wind up preferring 80387 registers if
26643 we plan on doing computation with them. */
26644 if (TARGET_80387
26645 && standard_80387_constant_p (x))
26646 {
26647 /* Limit class to non-sse. */
26648 if (regclass == FLOAT_SSE_REGS)
26649 return FLOAT_REGS;
26650 if (regclass == FP_TOP_SSE_REGS)
26651 return FP_TOP_REG;
26652 if (regclass == FP_SECOND_SSE_REGS)
26653 return FP_SECOND_REG;
26654 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
26655 return regclass;
26656 }
26657
26658 return NO_REGS;
26659 }
26660
26661 /* Generally when we see PLUS here, it's the function invariant
26662 (plus soft-fp const_int). Which can only be computed into general
26663 regs. */
26664 if (GET_CODE (x) == PLUS)
26665 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
26666
26667 /* QImode constants are easy to load, but non-constant QImode data
26668 must go into Q_REGS. */
26669 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
26670 {
26671 if (reg_class_subset_p (regclass, Q_REGS))
26672 return regclass;
26673 if (reg_class_subset_p (Q_REGS, regclass))
26674 return Q_REGS;
26675 return NO_REGS;
26676 }
26677
26678 return regclass;
26679 }
26680
26681 /* Discourage putting floating-point values in SSE registers unless
26682 SSE math is being used, and likewise for the 387 registers. */
26683 enum reg_class
26684 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
26685 {
26686 enum machine_mode mode = GET_MODE (x);
26687
26688 /* Restrict the output reload class to the register bank that we are doing
26689 math on. If we would like not to return a subset of CLASS, reject this
26690 alternative: if reload cannot do this, it will still use its choice. */
26691 mode = GET_MODE (x);
26692 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
26693 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
26694
26695 if (X87_FLOAT_MODE_P (mode))
26696 {
26697 if (regclass == FP_TOP_SSE_REGS)
26698 return FP_TOP_REG;
26699 else if (regclass == FP_SECOND_SSE_REGS)
26700 return FP_SECOND_REG;
26701 else
26702 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
26703 }
26704
26705 return regclass;
26706 }
26707
26708 static reg_class_t
26709 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
26710 enum machine_mode mode,
26711 secondary_reload_info *sri ATTRIBUTE_UNUSED)
26712 {
26713 /* QImode spills from non-QI registers require
26714 intermediate register on 32bit targets. */
26715 if (!in_p && mode == QImode && !TARGET_64BIT
26716 && (rclass == GENERAL_REGS
26717 || rclass == LEGACY_REGS
26718 || rclass == INDEX_REGS))
26719 {
26720 int regno;
26721
26722 if (REG_P (x))
26723 regno = REGNO (x);
26724 else
26725 regno = -1;
26726
26727 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
26728 regno = true_regnum (x);
26729
26730 /* Return Q_REGS if the operand is in memory. */
26731 if (regno == -1)
26732 return Q_REGS;
26733 }
26734
26735 return NO_REGS;
26736 }
26737
26738 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
26739
26740 static bool
26741 ix86_class_likely_spilled_p (reg_class_t rclass)
26742 {
26743 switch (rclass)
26744 {
26745 case AREG:
26746 case DREG:
26747 case CREG:
26748 case BREG:
26749 case AD_REGS:
26750 case SIREG:
26751 case DIREG:
26752 case SSE_FIRST_REG:
26753 case FP_TOP_REG:
26754 case FP_SECOND_REG:
26755 return true;
26756
26757 default:
26758 break;
26759 }
26760
26761 return false;
26762 }
26763
26764 /* If we are copying between general and FP registers, we need a memory
26765 location. The same is true for SSE and MMX registers.
26766
26767 To optimize register_move_cost performance, allow inline variant.
26768
26769 The macro can't work reliably when one of the CLASSES is class containing
26770 registers from multiple units (SSE, MMX, integer). We avoid this by never
26771 combining those units in single alternative in the machine description.
26772 Ensure that this constraint holds to avoid unexpected surprises.
26773
26774 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
26775 enforce these sanity checks. */
26776
26777 static inline bool
26778 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26779 enum machine_mode mode, int strict)
26780 {
26781 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
26782 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
26783 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
26784 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
26785 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
26786 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
26787 {
26788 gcc_assert (!strict);
26789 return true;
26790 }
26791
26792 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
26793 return true;
26794
26795 /* ??? This is a lie. We do have moves between mmx/general, and for
26796 mmx/sse2. But by saying we need secondary memory we discourage the
26797 register allocator from using the mmx registers unless needed. */
26798 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
26799 return true;
26800
26801 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26802 {
26803 /* SSE1 doesn't have any direct moves from other classes. */
26804 if (!TARGET_SSE2)
26805 return true;
26806
26807 /* If the target says that inter-unit moves are more expensive
26808 than moving through memory, then don't generate them. */
26809 if (!TARGET_INTER_UNIT_MOVES)
26810 return true;
26811
26812 /* Between SSE and general, we have moves no larger than word size. */
26813 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
26814 return true;
26815 }
26816
26817 return false;
26818 }
26819
26820 bool
26821 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26822 enum machine_mode mode, int strict)
26823 {
26824 return inline_secondary_memory_needed (class1, class2, mode, strict);
26825 }
26826
26827 /* Return true if the registers in CLASS cannot represent the change from
26828 modes FROM to TO. */
26829
26830 bool
26831 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
26832 enum reg_class regclass)
26833 {
26834 if (from == to)
26835 return false;
26836
26837 /* x87 registers can't do subreg at all, as all values are reformatted
26838 to extended precision. */
26839 if (MAYBE_FLOAT_CLASS_P (regclass))
26840 return true;
26841
26842 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
26843 {
26844 /* Vector registers do not support QI or HImode loads. If we don't
26845 disallow a change to these modes, reload will assume it's ok to
26846 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26847 the vec_dupv4hi pattern. */
26848 if (GET_MODE_SIZE (from) < 4)
26849 return true;
26850
26851 /* Vector registers do not support subreg with nonzero offsets, which
26852 are otherwise valid for integer registers. Since we can't see
26853 whether we have a nonzero offset from here, prohibit all
26854 nonparadoxical subregs changing size. */
26855 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26856 return true;
26857 }
26858
26859 return false;
26860 }
26861
26862 /* Return the cost of moving data of mode M between a
26863 register and memory. A value of 2 is the default; this cost is
26864 relative to those in `REGISTER_MOVE_COST'.
26865
26866 This function is used extensively by register_move_cost that is used to
26867 build tables at startup. Make it inline in this case.
26868 When IN is 2, return maximum of in and out move cost.
26869
26870 If moving between registers and memory is more expensive than
26871 between two registers, you should define this macro to express the
26872 relative cost.
26873
26874 Model also increased moving costs of QImode registers in non
26875 Q_REGS classes.
26876 */
26877 static inline int
26878 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26879 int in)
26880 {
26881 int cost;
26882 if (FLOAT_CLASS_P (regclass))
26883 {
26884 int index;
26885 switch (mode)
26886 {
26887 case SFmode:
26888 index = 0;
26889 break;
26890 case DFmode:
26891 index = 1;
26892 break;
26893 case XFmode:
26894 index = 2;
26895 break;
26896 default:
26897 return 100;
26898 }
26899 if (in == 2)
26900 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26901 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26902 }
26903 if (SSE_CLASS_P (regclass))
26904 {
26905 int index;
26906 switch (GET_MODE_SIZE (mode))
26907 {
26908 case 4:
26909 index = 0;
26910 break;
26911 case 8:
26912 index = 1;
26913 break;
26914 case 16:
26915 index = 2;
26916 break;
26917 default:
26918 return 100;
26919 }
26920 if (in == 2)
26921 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
26922 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
26923 }
26924 if (MMX_CLASS_P (regclass))
26925 {
26926 int index;
26927 switch (GET_MODE_SIZE (mode))
26928 {
26929 case 4:
26930 index = 0;
26931 break;
26932 case 8:
26933 index = 1;
26934 break;
26935 default:
26936 return 100;
26937 }
26938 if (in)
26939 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
26940 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
26941 }
26942 switch (GET_MODE_SIZE (mode))
26943 {
26944 case 1:
26945 if (Q_CLASS_P (regclass) || TARGET_64BIT)
26946 {
26947 if (!in)
26948 return ix86_cost->int_store[0];
26949 if (TARGET_PARTIAL_REG_DEPENDENCY
26950 && optimize_function_for_speed_p (cfun))
26951 cost = ix86_cost->movzbl_load;
26952 else
26953 cost = ix86_cost->int_load[0];
26954 if (in == 2)
26955 return MAX (cost, ix86_cost->int_store[0]);
26956 return cost;
26957 }
26958 else
26959 {
26960 if (in == 2)
26961 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
26962 if (in)
26963 return ix86_cost->movzbl_load;
26964 else
26965 return ix86_cost->int_store[0] + 4;
26966 }
26967 break;
26968 case 2:
26969 if (in == 2)
26970 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
26971 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
26972 default:
26973 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
26974 if (mode == TFmode)
26975 mode = XFmode;
26976 if (in == 2)
26977 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
26978 else if (in)
26979 cost = ix86_cost->int_load[2];
26980 else
26981 cost = ix86_cost->int_store[2];
26982 return (cost * (((int) GET_MODE_SIZE (mode)
26983 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
26984 }
26985 }
26986
26987 static int
26988 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
26989 bool in)
26990 {
26991 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
26992 }
26993
26994
26995 /* Return the cost of moving data from a register in class CLASS1 to
26996 one in class CLASS2.
26997
26998 It is not required that the cost always equal 2 when FROM is the same as TO;
26999 on some machines it is expensive to move between registers if they are not
27000 general registers. */
27001
27002 static int
27003 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
27004 reg_class_t class2_i)
27005 {
27006 enum reg_class class1 = (enum reg_class) class1_i;
27007 enum reg_class class2 = (enum reg_class) class2_i;
27008
27009 /* In case we require secondary memory, compute cost of the store followed
27010 by load. In order to avoid bad register allocation choices, we need
27011 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
27012
27013 if (inline_secondary_memory_needed (class1, class2, mode, 0))
27014 {
27015 int cost = 1;
27016
27017 cost += inline_memory_move_cost (mode, class1, 2);
27018 cost += inline_memory_move_cost (mode, class2, 2);
27019
27020 /* In case of copying from general_purpose_register we may emit multiple
27021 stores followed by single load causing memory size mismatch stall.
27022 Count this as arbitrarily high cost of 20. */
27023 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
27024 cost += 20;
27025
27026 /* In the case of FP/MMX moves, the registers actually overlap, and we
27027 have to switch modes in order to treat them differently. */
27028 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
27029 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
27030 cost += 20;
27031
27032 return cost;
27033 }
27034
27035 /* Moves between SSE/MMX and integer unit are expensive. */
27036 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
27037 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
27038
27039 /* ??? By keeping returned value relatively high, we limit the number
27040 of moves between integer and MMX/SSE registers for all targets.
27041 Additionally, high value prevents problem with x86_modes_tieable_p(),
27042 where integer modes in MMX/SSE registers are not tieable
27043 because of missing QImode and HImode moves to, from or between
27044 MMX/SSE registers. */
27045 return MAX (8, ix86_cost->mmxsse_to_integer);
27046
27047 if (MAYBE_FLOAT_CLASS_P (class1))
27048 return ix86_cost->fp_move;
27049 if (MAYBE_SSE_CLASS_P (class1))
27050 return ix86_cost->sse_move;
27051 if (MAYBE_MMX_CLASS_P (class1))
27052 return ix86_cost->mmx_move;
27053 return 2;
27054 }
27055
27056 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
27057
27058 bool
27059 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
27060 {
27061 /* Flags and only flags can only hold CCmode values. */
27062 if (CC_REGNO_P (regno))
27063 return GET_MODE_CLASS (mode) == MODE_CC;
27064 if (GET_MODE_CLASS (mode) == MODE_CC
27065 || GET_MODE_CLASS (mode) == MODE_RANDOM
27066 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
27067 return 0;
27068 if (FP_REGNO_P (regno))
27069 return VALID_FP_MODE_P (mode);
27070 if (SSE_REGNO_P (regno))
27071 {
27072 /* We implement the move patterns for all vector modes into and
27073 out of SSE registers, even when no operation instructions
27074 are available. OImode move is available only when AVX is
27075 enabled. */
27076 return ((TARGET_AVX && mode == OImode)
27077 || VALID_AVX256_REG_MODE (mode)
27078 || VALID_SSE_REG_MODE (mode)
27079 || VALID_SSE2_REG_MODE (mode)
27080 || VALID_MMX_REG_MODE (mode)
27081 || VALID_MMX_REG_MODE_3DNOW (mode));
27082 }
27083 if (MMX_REGNO_P (regno))
27084 {
27085 /* We implement the move patterns for 3DNOW modes even in MMX mode,
27086 so if the register is available at all, then we can move data of
27087 the given mode into or out of it. */
27088 return (VALID_MMX_REG_MODE (mode)
27089 || VALID_MMX_REG_MODE_3DNOW (mode));
27090 }
27091
27092 if (mode == QImode)
27093 {
27094 /* Take care for QImode values - they can be in non-QI regs,
27095 but then they do cause partial register stalls. */
27096 if (regno <= BX_REG || TARGET_64BIT)
27097 return 1;
27098 if (!TARGET_PARTIAL_REG_STALL)
27099 return 1;
27100 return reload_in_progress || reload_completed;
27101 }
27102 /* We handle both integer and floats in the general purpose registers. */
27103 else if (VALID_INT_MODE_P (mode))
27104 return 1;
27105 else if (VALID_FP_MODE_P (mode))
27106 return 1;
27107 else if (VALID_DFP_MODE_P (mode))
27108 return 1;
27109 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
27110 on to use that value in smaller contexts, this can easily force a
27111 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
27112 supporting DImode, allow it. */
27113 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
27114 return 1;
27115
27116 return 0;
27117 }
27118
27119 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
27120 tieable integer mode. */
27121
27122 static bool
27123 ix86_tieable_integer_mode_p (enum machine_mode mode)
27124 {
27125 switch (mode)
27126 {
27127 case HImode:
27128 case SImode:
27129 return true;
27130
27131 case QImode:
27132 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
27133
27134 case DImode:
27135 return TARGET_64BIT;
27136
27137 default:
27138 return false;
27139 }
27140 }
27141
27142 /* Return true if MODE1 is accessible in a register that can hold MODE2
27143 without copying. That is, all register classes that can hold MODE2
27144 can also hold MODE1. */
27145
27146 bool
27147 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
27148 {
27149 if (mode1 == mode2)
27150 return true;
27151
27152 if (ix86_tieable_integer_mode_p (mode1)
27153 && ix86_tieable_integer_mode_p (mode2))
27154 return true;
27155
27156 /* MODE2 being XFmode implies fp stack or general regs, which means we
27157 can tie any smaller floating point modes to it. Note that we do not
27158 tie this with TFmode. */
27159 if (mode2 == XFmode)
27160 return mode1 == SFmode || mode1 == DFmode;
27161
27162 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
27163 that we can tie it with SFmode. */
27164 if (mode2 == DFmode)
27165 return mode1 == SFmode;
27166
27167 /* If MODE2 is only appropriate for an SSE register, then tie with
27168 any other mode acceptable to SSE registers. */
27169 if (GET_MODE_SIZE (mode2) == 16
27170 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
27171 return (GET_MODE_SIZE (mode1) == 16
27172 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
27173
27174 /* If MODE2 is appropriate for an MMX register, then tie
27175 with any other mode acceptable to MMX registers. */
27176 if (GET_MODE_SIZE (mode2) == 8
27177 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
27178 return (GET_MODE_SIZE (mode1) == 8
27179 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
27180
27181 return false;
27182 }
27183
27184 /* Compute a (partial) cost for rtx X. Return true if the complete
27185 cost has been computed, and false if subexpressions should be
27186 scanned. In either case, *TOTAL contains the cost result. */
27187
27188 static bool
27189 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
27190 {
27191 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
27192 enum machine_mode mode = GET_MODE (x);
27193 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
27194
27195 switch (code)
27196 {
27197 case CONST_INT:
27198 case CONST:
27199 case LABEL_REF:
27200 case SYMBOL_REF:
27201 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
27202 *total = 3;
27203 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
27204 *total = 2;
27205 else if (flag_pic && SYMBOLIC_CONST (x)
27206 && (!TARGET_64BIT
27207 || (!GET_CODE (x) != LABEL_REF
27208 && (GET_CODE (x) != SYMBOL_REF
27209 || !SYMBOL_REF_LOCAL_P (x)))))
27210 *total = 1;
27211 else
27212 *total = 0;
27213 return true;
27214
27215 case CONST_DOUBLE:
27216 if (mode == VOIDmode)
27217 *total = 0;
27218 else
27219 switch (standard_80387_constant_p (x))
27220 {
27221 case 1: /* 0.0 */
27222 *total = 1;
27223 break;
27224 default: /* Other constants */
27225 *total = 2;
27226 break;
27227 case 0:
27228 case -1:
27229 /* Start with (MEM (SYMBOL_REF)), since that's where
27230 it'll probably end up. Add a penalty for size. */
27231 *total = (COSTS_N_INSNS (1)
27232 + (flag_pic != 0 && !TARGET_64BIT)
27233 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
27234 break;
27235 }
27236 return true;
27237
27238 case ZERO_EXTEND:
27239 /* The zero extensions is often completely free on x86_64, so make
27240 it as cheap as possible. */
27241 if (TARGET_64BIT && mode == DImode
27242 && GET_MODE (XEXP (x, 0)) == SImode)
27243 *total = 1;
27244 else if (TARGET_ZERO_EXTEND_WITH_AND)
27245 *total = cost->add;
27246 else
27247 *total = cost->movzx;
27248 return false;
27249
27250 case SIGN_EXTEND:
27251 *total = cost->movsx;
27252 return false;
27253
27254 case ASHIFT:
27255 if (CONST_INT_P (XEXP (x, 1))
27256 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
27257 {
27258 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
27259 if (value == 1)
27260 {
27261 *total = cost->add;
27262 return false;
27263 }
27264 if ((value == 2 || value == 3)
27265 && cost->lea <= cost->shift_const)
27266 {
27267 *total = cost->lea;
27268 return false;
27269 }
27270 }
27271 /* FALLTHRU */
27272
27273 case ROTATE:
27274 case ASHIFTRT:
27275 case LSHIFTRT:
27276 case ROTATERT:
27277 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
27278 {
27279 if (CONST_INT_P (XEXP (x, 1)))
27280 {
27281 if (INTVAL (XEXP (x, 1)) > 32)
27282 *total = cost->shift_const + COSTS_N_INSNS (2);
27283 else
27284 *total = cost->shift_const * 2;
27285 }
27286 else
27287 {
27288 if (GET_CODE (XEXP (x, 1)) == AND)
27289 *total = cost->shift_var * 2;
27290 else
27291 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
27292 }
27293 }
27294 else
27295 {
27296 if (CONST_INT_P (XEXP (x, 1)))
27297 *total = cost->shift_const;
27298 else
27299 *total = cost->shift_var;
27300 }
27301 return false;
27302
27303 case MULT:
27304 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27305 {
27306 /* ??? SSE scalar cost should be used here. */
27307 *total = cost->fmul;
27308 return false;
27309 }
27310 else if (X87_FLOAT_MODE_P (mode))
27311 {
27312 *total = cost->fmul;
27313 return false;
27314 }
27315 else if (FLOAT_MODE_P (mode))
27316 {
27317 /* ??? SSE vector cost should be used here. */
27318 *total = cost->fmul;
27319 return false;
27320 }
27321 else
27322 {
27323 rtx op0 = XEXP (x, 0);
27324 rtx op1 = XEXP (x, 1);
27325 int nbits;
27326 if (CONST_INT_P (XEXP (x, 1)))
27327 {
27328 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
27329 for (nbits = 0; value != 0; value &= value - 1)
27330 nbits++;
27331 }
27332 else
27333 /* This is arbitrary. */
27334 nbits = 7;
27335
27336 /* Compute costs correctly for widening multiplication. */
27337 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
27338 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
27339 == GET_MODE_SIZE (mode))
27340 {
27341 int is_mulwiden = 0;
27342 enum machine_mode inner_mode = GET_MODE (op0);
27343
27344 if (GET_CODE (op0) == GET_CODE (op1))
27345 is_mulwiden = 1, op1 = XEXP (op1, 0);
27346 else if (CONST_INT_P (op1))
27347 {
27348 if (GET_CODE (op0) == SIGN_EXTEND)
27349 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
27350 == INTVAL (op1);
27351 else
27352 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
27353 }
27354
27355 if (is_mulwiden)
27356 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
27357 }
27358
27359 *total = (cost->mult_init[MODE_INDEX (mode)]
27360 + nbits * cost->mult_bit
27361 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
27362
27363 return true;
27364 }
27365
27366 case DIV:
27367 case UDIV:
27368 case MOD:
27369 case UMOD:
27370 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27371 /* ??? SSE cost should be used here. */
27372 *total = cost->fdiv;
27373 else if (X87_FLOAT_MODE_P (mode))
27374 *total = cost->fdiv;
27375 else if (FLOAT_MODE_P (mode))
27376 /* ??? SSE vector cost should be used here. */
27377 *total = cost->fdiv;
27378 else
27379 *total = cost->divide[MODE_INDEX (mode)];
27380 return false;
27381
27382 case PLUS:
27383 if (GET_MODE_CLASS (mode) == MODE_INT
27384 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
27385 {
27386 if (GET_CODE (XEXP (x, 0)) == PLUS
27387 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
27388 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
27389 && CONSTANT_P (XEXP (x, 1)))
27390 {
27391 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
27392 if (val == 2 || val == 4 || val == 8)
27393 {
27394 *total = cost->lea;
27395 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
27396 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
27397 outer_code, speed);
27398 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
27399 return true;
27400 }
27401 }
27402 else if (GET_CODE (XEXP (x, 0)) == MULT
27403 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
27404 {
27405 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
27406 if (val == 2 || val == 4 || val == 8)
27407 {
27408 *total = cost->lea;
27409 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
27410 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
27411 return true;
27412 }
27413 }
27414 else if (GET_CODE (XEXP (x, 0)) == PLUS)
27415 {
27416 *total = cost->lea;
27417 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
27418 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
27419 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
27420 return true;
27421 }
27422 }
27423 /* FALLTHRU */
27424
27425 case MINUS:
27426 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27427 {
27428 /* ??? SSE cost should be used here. */
27429 *total = cost->fadd;
27430 return false;
27431 }
27432 else if (X87_FLOAT_MODE_P (mode))
27433 {
27434 *total = cost->fadd;
27435 return false;
27436 }
27437 else if (FLOAT_MODE_P (mode))
27438 {
27439 /* ??? SSE vector cost should be used here. */
27440 *total = cost->fadd;
27441 return false;
27442 }
27443 /* FALLTHRU */
27444
27445 case AND:
27446 case IOR:
27447 case XOR:
27448 if (!TARGET_64BIT && mode == DImode)
27449 {
27450 *total = (cost->add * 2
27451 + (rtx_cost (XEXP (x, 0), outer_code, speed)
27452 << (GET_MODE (XEXP (x, 0)) != DImode))
27453 + (rtx_cost (XEXP (x, 1), outer_code, speed)
27454 << (GET_MODE (XEXP (x, 1)) != DImode)));
27455 return true;
27456 }
27457 /* FALLTHRU */
27458
27459 case NEG:
27460 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27461 {
27462 /* ??? SSE cost should be used here. */
27463 *total = cost->fchs;
27464 return false;
27465 }
27466 else if (X87_FLOAT_MODE_P (mode))
27467 {
27468 *total = cost->fchs;
27469 return false;
27470 }
27471 else if (FLOAT_MODE_P (mode))
27472 {
27473 /* ??? SSE vector cost should be used here. */
27474 *total = cost->fchs;
27475 return false;
27476 }
27477 /* FALLTHRU */
27478
27479 case NOT:
27480 if (!TARGET_64BIT && mode == DImode)
27481 *total = cost->add * 2;
27482 else
27483 *total = cost->add;
27484 return false;
27485
27486 case COMPARE:
27487 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
27488 && XEXP (XEXP (x, 0), 1) == const1_rtx
27489 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
27490 && XEXP (x, 1) == const0_rtx)
27491 {
27492 /* This kind of construct is implemented using test[bwl].
27493 Treat it as if we had an AND. */
27494 *total = (cost->add
27495 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
27496 + rtx_cost (const1_rtx, outer_code, speed));
27497 return true;
27498 }
27499 return false;
27500
27501 case FLOAT_EXTEND:
27502 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
27503 *total = 0;
27504 return false;
27505
27506 case ABS:
27507 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27508 /* ??? SSE cost should be used here. */
27509 *total = cost->fabs;
27510 else if (X87_FLOAT_MODE_P (mode))
27511 *total = cost->fabs;
27512 else if (FLOAT_MODE_P (mode))
27513 /* ??? SSE vector cost should be used here. */
27514 *total = cost->fabs;
27515 return false;
27516
27517 case SQRT:
27518 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27519 /* ??? SSE cost should be used here. */
27520 *total = cost->fsqrt;
27521 else if (X87_FLOAT_MODE_P (mode))
27522 *total = cost->fsqrt;
27523 else if (FLOAT_MODE_P (mode))
27524 /* ??? SSE vector cost should be used here. */
27525 *total = cost->fsqrt;
27526 return false;
27527
27528 case UNSPEC:
27529 if (XINT (x, 1) == UNSPEC_TP)
27530 *total = 0;
27531 return false;
27532
27533 case VEC_SELECT:
27534 case VEC_CONCAT:
27535 case VEC_MERGE:
27536 case VEC_DUPLICATE:
27537 /* ??? Assume all of these vector manipulation patterns are
27538 recognizable. In which case they all pretty much have the
27539 same cost. */
27540 *total = COSTS_N_INSNS (1);
27541 return true;
27542
27543 default:
27544 return false;
27545 }
27546 }
27547
27548 #if TARGET_MACHO
27549
27550 static int current_machopic_label_num;
27551
27552 /* Given a symbol name and its associated stub, write out the
27553 definition of the stub. */
27554
27555 void
27556 machopic_output_stub (FILE *file, const char *symb, const char *stub)
27557 {
27558 unsigned int length;
27559 char *binder_name, *symbol_name, lazy_ptr_name[32];
27560 int label = ++current_machopic_label_num;
27561
27562 /* For 64-bit we shouldn't get here. */
27563 gcc_assert (!TARGET_64BIT);
27564
27565 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
27566 symb = targetm.strip_name_encoding (symb);
27567
27568 length = strlen (stub);
27569 binder_name = XALLOCAVEC (char, length + 32);
27570 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
27571
27572 length = strlen (symb);
27573 symbol_name = XALLOCAVEC (char, length + 32);
27574 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
27575
27576 sprintf (lazy_ptr_name, "L%d$lz", label);
27577
27578 if (MACHOPIC_PURE)
27579 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
27580 else
27581 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
27582
27583 fprintf (file, "%s:\n", stub);
27584 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27585
27586 if (MACHOPIC_PURE)
27587 {
27588 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
27589 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
27590 fprintf (file, "\tjmp\t*%%edx\n");
27591 }
27592 else
27593 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
27594
27595 fprintf (file, "%s:\n", binder_name);
27596
27597 if (MACHOPIC_PURE)
27598 {
27599 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
27600 fputs ("\tpushl\t%eax\n", file);
27601 }
27602 else
27603 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
27604
27605 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
27606
27607 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
27608 fprintf (file, "%s:\n", lazy_ptr_name);
27609 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27610 fprintf (file, ASM_LONG "%s\n", binder_name);
27611 }
27612 #endif /* TARGET_MACHO */
27613
27614 /* Order the registers for register allocator. */
27615
27616 void
27617 x86_order_regs_for_local_alloc (void)
27618 {
27619 int pos = 0;
27620 int i;
27621
27622 /* First allocate the local general purpose registers. */
27623 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27624 if (GENERAL_REGNO_P (i) && call_used_regs[i])
27625 reg_alloc_order [pos++] = i;
27626
27627 /* Global general purpose registers. */
27628 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27629 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
27630 reg_alloc_order [pos++] = i;
27631
27632 /* x87 registers come first in case we are doing FP math
27633 using them. */
27634 if (!TARGET_SSE_MATH)
27635 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27636 reg_alloc_order [pos++] = i;
27637
27638 /* SSE registers. */
27639 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
27640 reg_alloc_order [pos++] = i;
27641 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
27642 reg_alloc_order [pos++] = i;
27643
27644 /* x87 registers. */
27645 if (TARGET_SSE_MATH)
27646 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27647 reg_alloc_order [pos++] = i;
27648
27649 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
27650 reg_alloc_order [pos++] = i;
27651
27652 /* Initialize the rest of array as we do not allocate some registers
27653 at all. */
27654 while (pos < FIRST_PSEUDO_REGISTER)
27655 reg_alloc_order [pos++] = 0;
27656 }
27657
27658 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
27659 struct attribute_spec.handler. */
27660 static tree
27661 ix86_handle_abi_attribute (tree *node, tree name,
27662 tree args ATTRIBUTE_UNUSED,
27663 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27664 {
27665 if (TREE_CODE (*node) != FUNCTION_TYPE
27666 && TREE_CODE (*node) != METHOD_TYPE
27667 && TREE_CODE (*node) != FIELD_DECL
27668 && TREE_CODE (*node) != TYPE_DECL)
27669 {
27670 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27671 name);
27672 *no_add_attrs = true;
27673 return NULL_TREE;
27674 }
27675 if (!TARGET_64BIT)
27676 {
27677 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
27678 name);
27679 *no_add_attrs = true;
27680 return NULL_TREE;
27681 }
27682
27683 /* Can combine regparm with all attributes but fastcall. */
27684 if (is_attribute_p ("ms_abi", name))
27685 {
27686 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
27687 {
27688 error ("ms_abi and sysv_abi attributes are not compatible");
27689 }
27690
27691 return NULL_TREE;
27692 }
27693 else if (is_attribute_p ("sysv_abi", name))
27694 {
27695 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
27696 {
27697 error ("ms_abi and sysv_abi attributes are not compatible");
27698 }
27699
27700 return NULL_TREE;
27701 }
27702
27703 return NULL_TREE;
27704 }
27705
27706 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
27707 struct attribute_spec.handler. */
27708 static tree
27709 ix86_handle_struct_attribute (tree *node, tree name,
27710 tree args ATTRIBUTE_UNUSED,
27711 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27712 {
27713 tree *type = NULL;
27714 if (DECL_P (*node))
27715 {
27716 if (TREE_CODE (*node) == TYPE_DECL)
27717 type = &TREE_TYPE (*node);
27718 }
27719 else
27720 type = node;
27721
27722 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
27723 || TREE_CODE (*type) == UNION_TYPE)))
27724 {
27725 warning (OPT_Wattributes, "%qE attribute ignored",
27726 name);
27727 *no_add_attrs = true;
27728 }
27729
27730 else if ((is_attribute_p ("ms_struct", name)
27731 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
27732 || ((is_attribute_p ("gcc_struct", name)
27733 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
27734 {
27735 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
27736 name);
27737 *no_add_attrs = true;
27738 }
27739
27740 return NULL_TREE;
27741 }
27742
27743 static tree
27744 ix86_handle_fndecl_attribute (tree *node, tree name,
27745 tree args ATTRIBUTE_UNUSED,
27746 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27747 {
27748 if (TREE_CODE (*node) != FUNCTION_DECL)
27749 {
27750 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27751 name);
27752 *no_add_attrs = true;
27753 }
27754 return NULL_TREE;
27755 }
27756
27757 static bool
27758 ix86_ms_bitfield_layout_p (const_tree record_type)
27759 {
27760 return ((TARGET_MS_BITFIELD_LAYOUT
27761 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
27762 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
27763 }
27764
27765 /* Returns an expression indicating where the this parameter is
27766 located on entry to the FUNCTION. */
27767
27768 static rtx
27769 x86_this_parameter (tree function)
27770 {
27771 tree type = TREE_TYPE (function);
27772 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
27773 int nregs;
27774
27775 if (TARGET_64BIT)
27776 {
27777 const int *parm_regs;
27778
27779 if (ix86_function_type_abi (type) == MS_ABI)
27780 parm_regs = x86_64_ms_abi_int_parameter_registers;
27781 else
27782 parm_regs = x86_64_int_parameter_registers;
27783 return gen_rtx_REG (DImode, parm_regs[aggr]);
27784 }
27785
27786 nregs = ix86_function_regparm (type, function);
27787
27788 if (nregs > 0 && !stdarg_p (type))
27789 {
27790 int regno;
27791
27792 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
27793 regno = aggr ? DX_REG : CX_REG;
27794 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
27795 {
27796 regno = CX_REG;
27797 if (aggr)
27798 return gen_rtx_MEM (SImode,
27799 plus_constant (stack_pointer_rtx, 4));
27800 }
27801 else
27802 {
27803 regno = AX_REG;
27804 if (aggr)
27805 {
27806 regno = DX_REG;
27807 if (nregs == 1)
27808 return gen_rtx_MEM (SImode,
27809 plus_constant (stack_pointer_rtx, 4));
27810 }
27811 }
27812 return gen_rtx_REG (SImode, regno);
27813 }
27814
27815 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
27816 }
27817
27818 /* Determine whether x86_output_mi_thunk can succeed. */
27819
27820 static bool
27821 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
27822 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
27823 HOST_WIDE_INT vcall_offset, const_tree function)
27824 {
27825 /* 64-bit can handle anything. */
27826 if (TARGET_64BIT)
27827 return true;
27828
27829 /* For 32-bit, everything's fine if we have one free register. */
27830 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
27831 return true;
27832
27833 /* Need a free register for vcall_offset. */
27834 if (vcall_offset)
27835 return false;
27836
27837 /* Need a free register for GOT references. */
27838 if (flag_pic && !targetm.binds_local_p (function))
27839 return false;
27840
27841 /* Otherwise ok. */
27842 return true;
27843 }
27844
27845 /* Output the assembler code for a thunk function. THUNK_DECL is the
27846 declaration for the thunk function itself, FUNCTION is the decl for
27847 the target function. DELTA is an immediate constant offset to be
27848 added to THIS. If VCALL_OFFSET is nonzero, the word at
27849 *(*this + vcall_offset) should be added to THIS. */
27850
27851 static void
27852 x86_output_mi_thunk (FILE *file,
27853 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
27854 HOST_WIDE_INT vcall_offset, tree function)
27855 {
27856 rtx xops[3];
27857 rtx this_param = x86_this_parameter (function);
27858 rtx this_reg, tmp;
27859
27860 /* Make sure unwind info is emitted for the thunk if needed. */
27861 final_start_function (emit_barrier (), file, 1);
27862
27863 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
27864 pull it in now and let DELTA benefit. */
27865 if (REG_P (this_param))
27866 this_reg = this_param;
27867 else if (vcall_offset)
27868 {
27869 /* Put the this parameter into %eax. */
27870 xops[0] = this_param;
27871 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
27872 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27873 }
27874 else
27875 this_reg = NULL_RTX;
27876
27877 /* Adjust the this parameter by a fixed constant. */
27878 if (delta)
27879 {
27880 xops[0] = GEN_INT (delta);
27881 xops[1] = this_reg ? this_reg : this_param;
27882 if (TARGET_64BIT)
27883 {
27884 if (!x86_64_general_operand (xops[0], DImode))
27885 {
27886 tmp = gen_rtx_REG (DImode, R10_REG);
27887 xops[1] = tmp;
27888 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
27889 xops[0] = tmp;
27890 xops[1] = this_param;
27891 }
27892 if (x86_maybe_negate_const_int (&xops[0], DImode))
27893 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
27894 else
27895 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
27896 }
27897 else if (x86_maybe_negate_const_int (&xops[0], SImode))
27898 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
27899 else
27900 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
27901 }
27902
27903 /* Adjust the this parameter by a value stored in the vtable. */
27904 if (vcall_offset)
27905 {
27906 if (TARGET_64BIT)
27907 tmp = gen_rtx_REG (DImode, R10_REG);
27908 else
27909 {
27910 int tmp_regno = CX_REG;
27911 if (lookup_attribute ("fastcall",
27912 TYPE_ATTRIBUTES (TREE_TYPE (function)))
27913 || lookup_attribute ("thiscall",
27914 TYPE_ATTRIBUTES (TREE_TYPE (function))))
27915 tmp_regno = AX_REG;
27916 tmp = gen_rtx_REG (SImode, tmp_regno);
27917 }
27918
27919 xops[0] = gen_rtx_MEM (Pmode, this_reg);
27920 xops[1] = tmp;
27921 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27922
27923 /* Adjust the this parameter. */
27924 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
27925 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
27926 {
27927 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
27928 xops[0] = GEN_INT (vcall_offset);
27929 xops[1] = tmp2;
27930 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
27931 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
27932 }
27933 xops[1] = this_reg;
27934 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
27935 }
27936
27937 /* If necessary, drop THIS back to its stack slot. */
27938 if (this_reg && this_reg != this_param)
27939 {
27940 xops[0] = this_reg;
27941 xops[1] = this_param;
27942 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27943 }
27944
27945 xops[0] = XEXP (DECL_RTL (function), 0);
27946 if (TARGET_64BIT)
27947 {
27948 if (!flag_pic || targetm.binds_local_p (function))
27949 output_asm_insn ("jmp\t%P0", xops);
27950 /* All thunks should be in the same object as their target,
27951 and thus binds_local_p should be true. */
27952 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
27953 gcc_unreachable ();
27954 else
27955 {
27956 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
27957 tmp = gen_rtx_CONST (Pmode, tmp);
27958 tmp = gen_rtx_MEM (QImode, tmp);
27959 xops[0] = tmp;
27960 output_asm_insn ("jmp\t%A0", xops);
27961 }
27962 }
27963 else
27964 {
27965 if (!flag_pic || targetm.binds_local_p (function))
27966 output_asm_insn ("jmp\t%P0", xops);
27967 else
27968 #if TARGET_MACHO
27969 if (TARGET_MACHO)
27970 {
27971 rtx sym_ref = XEXP (DECL_RTL (function), 0);
27972 if (TARGET_MACHO_BRANCH_ISLANDS)
27973 sym_ref = (gen_rtx_SYMBOL_REF
27974 (Pmode,
27975 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
27976 tmp = gen_rtx_MEM (QImode, sym_ref);
27977 xops[0] = tmp;
27978 output_asm_insn ("jmp\t%0", xops);
27979 }
27980 else
27981 #endif /* TARGET_MACHO */
27982 {
27983 tmp = gen_rtx_REG (SImode, CX_REG);
27984 output_set_got (tmp, NULL_RTX);
27985
27986 xops[1] = tmp;
27987 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
27988 output_asm_insn ("jmp\t{*}%1", xops);
27989 }
27990 }
27991 final_end_function ();
27992 }
27993
27994 static void
27995 x86_file_start (void)
27996 {
27997 default_file_start ();
27998 #if TARGET_MACHO
27999 darwin_file_start ();
28000 #endif
28001 if (X86_FILE_START_VERSION_DIRECTIVE)
28002 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
28003 if (X86_FILE_START_FLTUSED)
28004 fputs ("\t.global\t__fltused\n", asm_out_file);
28005 if (ix86_asm_dialect == ASM_INTEL)
28006 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
28007 }
28008
28009 int
28010 x86_field_alignment (tree field, int computed)
28011 {
28012 enum machine_mode mode;
28013 tree type = TREE_TYPE (field);
28014
28015 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
28016 return computed;
28017 mode = TYPE_MODE (strip_array_types (type));
28018 if (mode == DFmode || mode == DCmode
28019 || GET_MODE_CLASS (mode) == MODE_INT
28020 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
28021 return MIN (32, computed);
28022 return computed;
28023 }
28024
28025 /* Output assembler code to FILE to increment profiler label # LABELNO
28026 for profiling a function entry. */
28027 void
28028 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
28029 {
28030 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
28031 : MCOUNT_NAME);
28032
28033 if (TARGET_64BIT)
28034 {
28035 #ifndef NO_PROFILE_COUNTERS
28036 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
28037 #endif
28038
28039 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
28040 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
28041 else
28042 fprintf (file, "\tcall\t%s\n", mcount_name);
28043 }
28044 else if (flag_pic)
28045 {
28046 #ifndef NO_PROFILE_COUNTERS
28047 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
28048 LPREFIX, labelno);
28049 #endif
28050 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
28051 }
28052 else
28053 {
28054 #ifndef NO_PROFILE_COUNTERS
28055 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
28056 LPREFIX, labelno);
28057 #endif
28058 fprintf (file, "\tcall\t%s\n", mcount_name);
28059 }
28060 }
28061
28062 /* We don't have exact information about the insn sizes, but we may assume
28063 quite safely that we are informed about all 1 byte insns and memory
28064 address sizes. This is enough to eliminate unnecessary padding in
28065 99% of cases. */
28066
28067 static int
28068 min_insn_size (rtx insn)
28069 {
28070 int l = 0, len;
28071
28072 if (!INSN_P (insn) || !active_insn_p (insn))
28073 return 0;
28074
28075 /* Discard alignments we've emit and jump instructions. */
28076 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
28077 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
28078 return 0;
28079 if (JUMP_TABLE_DATA_P (insn))
28080 return 0;
28081
28082 /* Important case - calls are always 5 bytes.
28083 It is common to have many calls in the row. */
28084 if (CALL_P (insn)
28085 && symbolic_reference_mentioned_p (PATTERN (insn))
28086 && !SIBLING_CALL_P (insn))
28087 return 5;
28088 len = get_attr_length (insn);
28089 if (len <= 1)
28090 return 1;
28091
28092 /* For normal instructions we rely on get_attr_length being exact,
28093 with a few exceptions. */
28094 if (!JUMP_P (insn))
28095 {
28096 enum attr_type type = get_attr_type (insn);
28097
28098 switch (type)
28099 {
28100 case TYPE_MULTI:
28101 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
28102 || asm_noperands (PATTERN (insn)) >= 0)
28103 return 0;
28104 break;
28105 case TYPE_OTHER:
28106 case TYPE_FCMP:
28107 break;
28108 default:
28109 /* Otherwise trust get_attr_length. */
28110 return len;
28111 }
28112
28113 l = get_attr_length_address (insn);
28114 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
28115 l = 4;
28116 }
28117 if (l)
28118 return 1+l;
28119 else
28120 return 2;
28121 }
28122
28123 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
28124
28125 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
28126 window. */
28127
28128 static void
28129 ix86_avoid_jump_mispredicts (void)
28130 {
28131 rtx insn, start = get_insns ();
28132 int nbytes = 0, njumps = 0;
28133 int isjump = 0;
28134
28135 /* Look for all minimal intervals of instructions containing 4 jumps.
28136 The intervals are bounded by START and INSN. NBYTES is the total
28137 size of instructions in the interval including INSN and not including
28138 START. When the NBYTES is smaller than 16 bytes, it is possible
28139 that the end of START and INSN ends up in the same 16byte page.
28140
28141 The smallest offset in the page INSN can start is the case where START
28142 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
28143 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
28144 */
28145 for (insn = start; insn; insn = NEXT_INSN (insn))
28146 {
28147 int min_size;
28148
28149 if (LABEL_P (insn))
28150 {
28151 int align = label_to_alignment (insn);
28152 int max_skip = label_to_max_skip (insn);
28153
28154 if (max_skip > 15)
28155 max_skip = 15;
28156 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
28157 already in the current 16 byte page, because otherwise
28158 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
28159 bytes to reach 16 byte boundary. */
28160 if (align <= 0
28161 || (align <= 3 && max_skip != (1 << align) - 1))
28162 max_skip = 0;
28163 if (dump_file)
28164 fprintf (dump_file, "Label %i with max_skip %i\n",
28165 INSN_UID (insn), max_skip);
28166 if (max_skip)
28167 {
28168 while (nbytes + max_skip >= 16)
28169 {
28170 start = NEXT_INSN (start);
28171 if ((JUMP_P (start)
28172 && GET_CODE (PATTERN (start)) != ADDR_VEC
28173 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
28174 || CALL_P (start))
28175 njumps--, isjump = 1;
28176 else
28177 isjump = 0;
28178 nbytes -= min_insn_size (start);
28179 }
28180 }
28181 continue;
28182 }
28183
28184 min_size = min_insn_size (insn);
28185 nbytes += min_size;
28186 if (dump_file)
28187 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
28188 INSN_UID (insn), min_size);
28189 if ((JUMP_P (insn)
28190 && GET_CODE (PATTERN (insn)) != ADDR_VEC
28191 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
28192 || CALL_P (insn))
28193 njumps++;
28194 else
28195 continue;
28196
28197 while (njumps > 3)
28198 {
28199 start = NEXT_INSN (start);
28200 if ((JUMP_P (start)
28201 && GET_CODE (PATTERN (start)) != ADDR_VEC
28202 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
28203 || CALL_P (start))
28204 njumps--, isjump = 1;
28205 else
28206 isjump = 0;
28207 nbytes -= min_insn_size (start);
28208 }
28209 gcc_assert (njumps >= 0);
28210 if (dump_file)
28211 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
28212 INSN_UID (start), INSN_UID (insn), nbytes);
28213
28214 if (njumps == 3 && isjump && nbytes < 16)
28215 {
28216 int padsize = 15 - nbytes + min_insn_size (insn);
28217
28218 if (dump_file)
28219 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
28220 INSN_UID (insn), padsize);
28221 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
28222 }
28223 }
28224 }
28225 #endif
28226
28227 /* AMD Athlon works faster
28228 when RET is not destination of conditional jump or directly preceded
28229 by other jump instruction. We avoid the penalty by inserting NOP just
28230 before the RET instructions in such cases. */
28231 static void
28232 ix86_pad_returns (void)
28233 {
28234 edge e;
28235 edge_iterator ei;
28236
28237 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
28238 {
28239 basic_block bb = e->src;
28240 rtx ret = BB_END (bb);
28241 rtx prev;
28242 bool replace = false;
28243
28244 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
28245 || optimize_bb_for_size_p (bb))
28246 continue;
28247 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
28248 if (active_insn_p (prev) || LABEL_P (prev))
28249 break;
28250 if (prev && LABEL_P (prev))
28251 {
28252 edge e;
28253 edge_iterator ei;
28254
28255 FOR_EACH_EDGE (e, ei, bb->preds)
28256 if (EDGE_FREQUENCY (e) && e->src->index >= 0
28257 && !(e->flags & EDGE_FALLTHRU))
28258 replace = true;
28259 }
28260 if (!replace)
28261 {
28262 prev = prev_active_insn (ret);
28263 if (prev
28264 && ((JUMP_P (prev) && any_condjump_p (prev))
28265 || CALL_P (prev)))
28266 replace = true;
28267 /* Empty functions get branch mispredict even when the jump destination
28268 is not visible to us. */
28269 if (!prev && !optimize_function_for_size_p (cfun))
28270 replace = true;
28271 }
28272 if (replace)
28273 {
28274 emit_jump_insn_before (gen_return_internal_long (), ret);
28275 delete_insn (ret);
28276 }
28277 }
28278 }
28279
28280 /* Count the minimum number of instructions in BB. Return 4 if the
28281 number of instructions >= 4. */
28282
28283 static int
28284 ix86_count_insn_bb (basic_block bb)
28285 {
28286 rtx insn;
28287 int insn_count = 0;
28288
28289 /* Count number of instructions in this block. Return 4 if the number
28290 of instructions >= 4. */
28291 FOR_BB_INSNS (bb, insn)
28292 {
28293 /* Only happen in exit blocks. */
28294 if (JUMP_P (insn)
28295 && GET_CODE (PATTERN (insn)) == RETURN)
28296 break;
28297
28298 if (NONDEBUG_INSN_P (insn)
28299 && GET_CODE (PATTERN (insn)) != USE
28300 && GET_CODE (PATTERN (insn)) != CLOBBER)
28301 {
28302 insn_count++;
28303 if (insn_count >= 4)
28304 return insn_count;
28305 }
28306 }
28307
28308 return insn_count;
28309 }
28310
28311
28312 /* Count the minimum number of instructions in code path in BB.
28313 Return 4 if the number of instructions >= 4. */
28314
28315 static int
28316 ix86_count_insn (basic_block bb)
28317 {
28318 edge e;
28319 edge_iterator ei;
28320 int min_prev_count;
28321
28322 /* Only bother counting instructions along paths with no
28323 more than 2 basic blocks between entry and exit. Given
28324 that BB has an edge to exit, determine if a predecessor
28325 of BB has an edge from entry. If so, compute the number
28326 of instructions in the predecessor block. If there
28327 happen to be multiple such blocks, compute the minimum. */
28328 min_prev_count = 4;
28329 FOR_EACH_EDGE (e, ei, bb->preds)
28330 {
28331 edge prev_e;
28332 edge_iterator prev_ei;
28333
28334 if (e->src == ENTRY_BLOCK_PTR)
28335 {
28336 min_prev_count = 0;
28337 break;
28338 }
28339 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
28340 {
28341 if (prev_e->src == ENTRY_BLOCK_PTR)
28342 {
28343 int count = ix86_count_insn_bb (e->src);
28344 if (count < min_prev_count)
28345 min_prev_count = count;
28346 break;
28347 }
28348 }
28349 }
28350
28351 if (min_prev_count < 4)
28352 min_prev_count += ix86_count_insn_bb (bb);
28353
28354 return min_prev_count;
28355 }
28356
28357 /* Pad short funtion to 4 instructions. */
28358
28359 static void
28360 ix86_pad_short_function (void)
28361 {
28362 edge e;
28363 edge_iterator ei;
28364
28365 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
28366 {
28367 rtx ret = BB_END (e->src);
28368 if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
28369 {
28370 int insn_count = ix86_count_insn (e->src);
28371
28372 /* Pad short function. */
28373 if (insn_count < 4)
28374 {
28375 rtx insn = ret;
28376
28377 /* Find epilogue. */
28378 while (insn
28379 && (!NOTE_P (insn)
28380 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
28381 insn = PREV_INSN (insn);
28382
28383 if (!insn)
28384 insn = ret;
28385
28386 /* Two NOPs are counted as one instruction. */
28387 insn_count = 2 * (4 - insn_count);
28388 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
28389 }
28390 }
28391 }
28392 }
28393
28394 /* Implement machine specific optimizations. We implement padding of returns
28395 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
28396 static void
28397 ix86_reorg (void)
28398 {
28399 if (optimize && optimize_function_for_speed_p (cfun))
28400 {
28401 if (TARGET_PAD_SHORT_FUNCTION)
28402 ix86_pad_short_function ();
28403 else if (TARGET_PAD_RETURNS)
28404 ix86_pad_returns ();
28405 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
28406 if (TARGET_FOUR_JUMP_LIMIT)
28407 ix86_avoid_jump_mispredicts ();
28408 #endif
28409 }
28410 }
28411
28412 /* Return nonzero when QImode register that must be represented via REX prefix
28413 is used. */
28414 bool
28415 x86_extended_QIreg_mentioned_p (rtx insn)
28416 {
28417 int i;
28418 extract_insn_cached (insn);
28419 for (i = 0; i < recog_data.n_operands; i++)
28420 if (REG_P (recog_data.operand[i])
28421 && REGNO (recog_data.operand[i]) > BX_REG)
28422 return true;
28423 return false;
28424 }
28425
28426 /* Return nonzero when P points to register encoded via REX prefix.
28427 Called via for_each_rtx. */
28428 static int
28429 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
28430 {
28431 unsigned int regno;
28432 if (!REG_P (*p))
28433 return 0;
28434 regno = REGNO (*p);
28435 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
28436 }
28437
28438 /* Return true when INSN mentions register that must be encoded using REX
28439 prefix. */
28440 bool
28441 x86_extended_reg_mentioned_p (rtx insn)
28442 {
28443 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
28444 extended_reg_mentioned_1, NULL);
28445 }
28446
28447 /* If profitable, negate (without causing overflow) integer constant
28448 of mode MODE at location LOC. Return true in this case. */
28449 bool
28450 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
28451 {
28452 HOST_WIDE_INT val;
28453
28454 if (!CONST_INT_P (*loc))
28455 return false;
28456
28457 switch (mode)
28458 {
28459 case DImode:
28460 /* DImode x86_64 constants must fit in 32 bits. */
28461 gcc_assert (x86_64_immediate_operand (*loc, mode));
28462
28463 mode = SImode;
28464 break;
28465
28466 case SImode:
28467 case HImode:
28468 case QImode:
28469 break;
28470
28471 default:
28472 gcc_unreachable ();
28473 }
28474
28475 /* Avoid overflows. */
28476 if (mode_signbit_p (mode, *loc))
28477 return false;
28478
28479 val = INTVAL (*loc);
28480
28481 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
28482 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
28483 if ((val < 0 && val != -128)
28484 || val == 128)
28485 {
28486 *loc = GEN_INT (-val);
28487 return true;
28488 }
28489
28490 return false;
28491 }
28492
28493 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
28494 optabs would emit if we didn't have TFmode patterns. */
28495
28496 void
28497 x86_emit_floatuns (rtx operands[2])
28498 {
28499 rtx neglab, donelab, i0, i1, f0, in, out;
28500 enum machine_mode mode, inmode;
28501
28502 inmode = GET_MODE (operands[1]);
28503 gcc_assert (inmode == SImode || inmode == DImode);
28504
28505 out = operands[0];
28506 in = force_reg (inmode, operands[1]);
28507 mode = GET_MODE (out);
28508 neglab = gen_label_rtx ();
28509 donelab = gen_label_rtx ();
28510 f0 = gen_reg_rtx (mode);
28511
28512 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
28513
28514 expand_float (out, in, 0);
28515
28516 emit_jump_insn (gen_jump (donelab));
28517 emit_barrier ();
28518
28519 emit_label (neglab);
28520
28521 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
28522 1, OPTAB_DIRECT);
28523 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
28524 1, OPTAB_DIRECT);
28525 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
28526
28527 expand_float (f0, i0, 0);
28528
28529 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
28530
28531 emit_label (donelab);
28532 }
28533 \f
28534 /* AVX does not support 32-byte integer vector operations,
28535 thus the longest vector we are faced with is V16QImode. */
28536 #define MAX_VECT_LEN 16
28537
28538 struct expand_vec_perm_d
28539 {
28540 rtx target, op0, op1;
28541 unsigned char perm[MAX_VECT_LEN];
28542 enum machine_mode vmode;
28543 unsigned char nelt;
28544 bool testing_p;
28545 };
28546
28547 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
28548 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
28549
28550 /* Get a vector mode of the same size as the original but with elements
28551 twice as wide. This is only guaranteed to apply to integral vectors. */
28552
28553 static inline enum machine_mode
28554 get_mode_wider_vector (enum machine_mode o)
28555 {
28556 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
28557 enum machine_mode n = GET_MODE_WIDER_MODE (o);
28558 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
28559 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
28560 return n;
28561 }
28562
28563 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28564 with all elements equal to VAR. Return true if successful. */
28565
28566 static bool
28567 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
28568 rtx target, rtx val)
28569 {
28570 bool ok;
28571
28572 switch (mode)
28573 {
28574 case V2SImode:
28575 case V2SFmode:
28576 if (!mmx_ok)
28577 return false;
28578 /* FALLTHRU */
28579
28580 case V4DFmode:
28581 case V4DImode:
28582 case V8SFmode:
28583 case V8SImode:
28584 case V2DFmode:
28585 case V2DImode:
28586 case V4SFmode:
28587 case V4SImode:
28588 {
28589 rtx insn, dup;
28590
28591 /* First attempt to recognize VAL as-is. */
28592 dup = gen_rtx_VEC_DUPLICATE (mode, val);
28593 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
28594 if (recog_memoized (insn) < 0)
28595 {
28596 rtx seq;
28597 /* If that fails, force VAL into a register. */
28598
28599 start_sequence ();
28600 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
28601 seq = get_insns ();
28602 end_sequence ();
28603 if (seq)
28604 emit_insn_before (seq, insn);
28605
28606 ok = recog_memoized (insn) >= 0;
28607 gcc_assert (ok);
28608 }
28609 }
28610 return true;
28611
28612 case V4HImode:
28613 if (!mmx_ok)
28614 return false;
28615 if (TARGET_SSE || TARGET_3DNOW_A)
28616 {
28617 rtx x;
28618
28619 val = gen_lowpart (SImode, val);
28620 x = gen_rtx_TRUNCATE (HImode, val);
28621 x = gen_rtx_VEC_DUPLICATE (mode, x);
28622 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28623 return true;
28624 }
28625 goto widen;
28626
28627 case V8QImode:
28628 if (!mmx_ok)
28629 return false;
28630 goto widen;
28631
28632 case V8HImode:
28633 if (TARGET_SSE2)
28634 {
28635 struct expand_vec_perm_d dperm;
28636 rtx tmp1, tmp2;
28637
28638 permute:
28639 memset (&dperm, 0, sizeof (dperm));
28640 dperm.target = target;
28641 dperm.vmode = mode;
28642 dperm.nelt = GET_MODE_NUNITS (mode);
28643 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
28644
28645 /* Extend to SImode using a paradoxical SUBREG. */
28646 tmp1 = gen_reg_rtx (SImode);
28647 emit_move_insn (tmp1, gen_lowpart (SImode, val));
28648
28649 /* Insert the SImode value as low element of a V4SImode vector. */
28650 tmp2 = gen_lowpart (V4SImode, dperm.op0);
28651 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
28652
28653 ok = (expand_vec_perm_1 (&dperm)
28654 || expand_vec_perm_broadcast_1 (&dperm));
28655 gcc_assert (ok);
28656 return ok;
28657 }
28658 goto widen;
28659
28660 case V16QImode:
28661 if (TARGET_SSE2)
28662 goto permute;
28663 goto widen;
28664
28665 widen:
28666 /* Replicate the value once into the next wider mode and recurse. */
28667 {
28668 enum machine_mode smode, wsmode, wvmode;
28669 rtx x;
28670
28671 smode = GET_MODE_INNER (mode);
28672 wvmode = get_mode_wider_vector (mode);
28673 wsmode = GET_MODE_INNER (wvmode);
28674
28675 val = convert_modes (wsmode, smode, val, true);
28676 x = expand_simple_binop (wsmode, ASHIFT, val,
28677 GEN_INT (GET_MODE_BITSIZE (smode)),
28678 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28679 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
28680
28681 x = gen_lowpart (wvmode, target);
28682 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
28683 gcc_assert (ok);
28684 return ok;
28685 }
28686
28687 case V16HImode:
28688 case V32QImode:
28689 {
28690 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
28691 rtx x = gen_reg_rtx (hvmode);
28692
28693 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
28694 gcc_assert (ok);
28695
28696 x = gen_rtx_VEC_CONCAT (mode, x, x);
28697 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28698 }
28699 return true;
28700
28701 default:
28702 return false;
28703 }
28704 }
28705
28706 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28707 whose ONE_VAR element is VAR, and other elements are zero. Return true
28708 if successful. */
28709
28710 static bool
28711 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
28712 rtx target, rtx var, int one_var)
28713 {
28714 enum machine_mode vsimode;
28715 rtx new_target;
28716 rtx x, tmp;
28717 bool use_vector_set = false;
28718
28719 switch (mode)
28720 {
28721 case V2DImode:
28722 /* For SSE4.1, we normally use vector set. But if the second
28723 element is zero and inter-unit moves are OK, we use movq
28724 instead. */
28725 use_vector_set = (TARGET_64BIT
28726 && TARGET_SSE4_1
28727 && !(TARGET_INTER_UNIT_MOVES
28728 && one_var == 0));
28729 break;
28730 case V16QImode:
28731 case V4SImode:
28732 case V4SFmode:
28733 use_vector_set = TARGET_SSE4_1;
28734 break;
28735 case V8HImode:
28736 use_vector_set = TARGET_SSE2;
28737 break;
28738 case V4HImode:
28739 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
28740 break;
28741 case V32QImode:
28742 case V16HImode:
28743 case V8SImode:
28744 case V8SFmode:
28745 case V4DFmode:
28746 use_vector_set = TARGET_AVX;
28747 break;
28748 case V4DImode:
28749 /* Use ix86_expand_vector_set in 64bit mode only. */
28750 use_vector_set = TARGET_AVX && TARGET_64BIT;
28751 break;
28752 default:
28753 break;
28754 }
28755
28756 if (use_vector_set)
28757 {
28758 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
28759 var = force_reg (GET_MODE_INNER (mode), var);
28760 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28761 return true;
28762 }
28763
28764 switch (mode)
28765 {
28766 case V2SFmode:
28767 case V2SImode:
28768 if (!mmx_ok)
28769 return false;
28770 /* FALLTHRU */
28771
28772 case V2DFmode:
28773 case V2DImode:
28774 if (one_var != 0)
28775 return false;
28776 var = force_reg (GET_MODE_INNER (mode), var);
28777 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
28778 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28779 return true;
28780
28781 case V4SFmode:
28782 case V4SImode:
28783 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
28784 new_target = gen_reg_rtx (mode);
28785 else
28786 new_target = target;
28787 var = force_reg (GET_MODE_INNER (mode), var);
28788 x = gen_rtx_VEC_DUPLICATE (mode, var);
28789 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
28790 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
28791 if (one_var != 0)
28792 {
28793 /* We need to shuffle the value to the correct position, so
28794 create a new pseudo to store the intermediate result. */
28795
28796 /* With SSE2, we can use the integer shuffle insns. */
28797 if (mode != V4SFmode && TARGET_SSE2)
28798 {
28799 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
28800 const1_rtx,
28801 GEN_INT (one_var == 1 ? 0 : 1),
28802 GEN_INT (one_var == 2 ? 0 : 1),
28803 GEN_INT (one_var == 3 ? 0 : 1)));
28804 if (target != new_target)
28805 emit_move_insn (target, new_target);
28806 return true;
28807 }
28808
28809 /* Otherwise convert the intermediate result to V4SFmode and
28810 use the SSE1 shuffle instructions. */
28811 if (mode != V4SFmode)
28812 {
28813 tmp = gen_reg_rtx (V4SFmode);
28814 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
28815 }
28816 else
28817 tmp = new_target;
28818
28819 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
28820 const1_rtx,
28821 GEN_INT (one_var == 1 ? 0 : 1),
28822 GEN_INT (one_var == 2 ? 0+4 : 1+4),
28823 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
28824
28825 if (mode != V4SFmode)
28826 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
28827 else if (tmp != target)
28828 emit_move_insn (target, tmp);
28829 }
28830 else if (target != new_target)
28831 emit_move_insn (target, new_target);
28832 return true;
28833
28834 case V8HImode:
28835 case V16QImode:
28836 vsimode = V4SImode;
28837 goto widen;
28838 case V4HImode:
28839 case V8QImode:
28840 if (!mmx_ok)
28841 return false;
28842 vsimode = V2SImode;
28843 goto widen;
28844 widen:
28845 if (one_var != 0)
28846 return false;
28847
28848 /* Zero extend the variable element to SImode and recurse. */
28849 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
28850
28851 x = gen_reg_rtx (vsimode);
28852 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
28853 var, one_var))
28854 gcc_unreachable ();
28855
28856 emit_move_insn (target, gen_lowpart (mode, x));
28857 return true;
28858
28859 default:
28860 return false;
28861 }
28862 }
28863
28864 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28865 consisting of the values in VALS. It is known that all elements
28866 except ONE_VAR are constants. Return true if successful. */
28867
28868 static bool
28869 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
28870 rtx target, rtx vals, int one_var)
28871 {
28872 rtx var = XVECEXP (vals, 0, one_var);
28873 enum machine_mode wmode;
28874 rtx const_vec, x;
28875
28876 const_vec = copy_rtx (vals);
28877 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
28878 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
28879
28880 switch (mode)
28881 {
28882 case V2DFmode:
28883 case V2DImode:
28884 case V2SFmode:
28885 case V2SImode:
28886 /* For the two element vectors, it's just as easy to use
28887 the general case. */
28888 return false;
28889
28890 case V4DImode:
28891 /* Use ix86_expand_vector_set in 64bit mode only. */
28892 if (!TARGET_64BIT)
28893 return false;
28894 case V4DFmode:
28895 case V8SFmode:
28896 case V8SImode:
28897 case V16HImode:
28898 case V32QImode:
28899 case V4SFmode:
28900 case V4SImode:
28901 case V8HImode:
28902 case V4HImode:
28903 break;
28904
28905 case V16QImode:
28906 if (TARGET_SSE4_1)
28907 break;
28908 wmode = V8HImode;
28909 goto widen;
28910 case V8QImode:
28911 wmode = V4HImode;
28912 goto widen;
28913 widen:
28914 /* There's no way to set one QImode entry easily. Combine
28915 the variable value with its adjacent constant value, and
28916 promote to an HImode set. */
28917 x = XVECEXP (vals, 0, one_var ^ 1);
28918 if (one_var & 1)
28919 {
28920 var = convert_modes (HImode, QImode, var, true);
28921 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
28922 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28923 x = GEN_INT (INTVAL (x) & 0xff);
28924 }
28925 else
28926 {
28927 var = convert_modes (HImode, QImode, var, true);
28928 x = gen_int_mode (INTVAL (x) << 8, HImode);
28929 }
28930 if (x != const0_rtx)
28931 var = expand_simple_binop (HImode, IOR, var, x, var,
28932 1, OPTAB_LIB_WIDEN);
28933
28934 x = gen_reg_rtx (wmode);
28935 emit_move_insn (x, gen_lowpart (wmode, const_vec));
28936 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
28937
28938 emit_move_insn (target, gen_lowpart (mode, x));
28939 return true;
28940
28941 default:
28942 return false;
28943 }
28944
28945 emit_move_insn (target, const_vec);
28946 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28947 return true;
28948 }
28949
28950 /* A subroutine of ix86_expand_vector_init_general. Use vector
28951 concatenate to handle the most general case: all values variable,
28952 and none identical. */
28953
28954 static void
28955 ix86_expand_vector_init_concat (enum machine_mode mode,
28956 rtx target, rtx *ops, int n)
28957 {
28958 enum machine_mode cmode, hmode = VOIDmode;
28959 rtx first[8], second[4];
28960 rtvec v;
28961 int i, j;
28962
28963 switch (n)
28964 {
28965 case 2:
28966 switch (mode)
28967 {
28968 case V8SImode:
28969 cmode = V4SImode;
28970 break;
28971 case V8SFmode:
28972 cmode = V4SFmode;
28973 break;
28974 case V4DImode:
28975 cmode = V2DImode;
28976 break;
28977 case V4DFmode:
28978 cmode = V2DFmode;
28979 break;
28980 case V4SImode:
28981 cmode = V2SImode;
28982 break;
28983 case V4SFmode:
28984 cmode = V2SFmode;
28985 break;
28986 case V2DImode:
28987 cmode = DImode;
28988 break;
28989 case V2SImode:
28990 cmode = SImode;
28991 break;
28992 case V2DFmode:
28993 cmode = DFmode;
28994 break;
28995 case V2SFmode:
28996 cmode = SFmode;
28997 break;
28998 default:
28999 gcc_unreachable ();
29000 }
29001
29002 if (!register_operand (ops[1], cmode))
29003 ops[1] = force_reg (cmode, ops[1]);
29004 if (!register_operand (ops[0], cmode))
29005 ops[0] = force_reg (cmode, ops[0]);
29006 emit_insn (gen_rtx_SET (VOIDmode, target,
29007 gen_rtx_VEC_CONCAT (mode, ops[0],
29008 ops[1])));
29009 break;
29010
29011 case 4:
29012 switch (mode)
29013 {
29014 case V4DImode:
29015 cmode = V2DImode;
29016 break;
29017 case V4DFmode:
29018 cmode = V2DFmode;
29019 break;
29020 case V4SImode:
29021 cmode = V2SImode;
29022 break;
29023 case V4SFmode:
29024 cmode = V2SFmode;
29025 break;
29026 default:
29027 gcc_unreachable ();
29028 }
29029 goto half;
29030
29031 case 8:
29032 switch (mode)
29033 {
29034 case V8SImode:
29035 cmode = V2SImode;
29036 hmode = V4SImode;
29037 break;
29038 case V8SFmode:
29039 cmode = V2SFmode;
29040 hmode = V4SFmode;
29041 break;
29042 default:
29043 gcc_unreachable ();
29044 }
29045 goto half;
29046
29047 half:
29048 /* FIXME: We process inputs backward to help RA. PR 36222. */
29049 i = n - 1;
29050 j = (n >> 1) - 1;
29051 for (; i > 0; i -= 2, j--)
29052 {
29053 first[j] = gen_reg_rtx (cmode);
29054 v = gen_rtvec (2, ops[i - 1], ops[i]);
29055 ix86_expand_vector_init (false, first[j],
29056 gen_rtx_PARALLEL (cmode, v));
29057 }
29058
29059 n >>= 1;
29060 if (n > 2)
29061 {
29062 gcc_assert (hmode != VOIDmode);
29063 for (i = j = 0; i < n; i += 2, j++)
29064 {
29065 second[j] = gen_reg_rtx (hmode);
29066 ix86_expand_vector_init_concat (hmode, second [j],
29067 &first [i], 2);
29068 }
29069 n >>= 1;
29070 ix86_expand_vector_init_concat (mode, target, second, n);
29071 }
29072 else
29073 ix86_expand_vector_init_concat (mode, target, first, n);
29074 break;
29075
29076 default:
29077 gcc_unreachable ();
29078 }
29079 }
29080
29081 /* A subroutine of ix86_expand_vector_init_general. Use vector
29082 interleave to handle the most general case: all values variable,
29083 and none identical. */
29084
29085 static void
29086 ix86_expand_vector_init_interleave (enum machine_mode mode,
29087 rtx target, rtx *ops, int n)
29088 {
29089 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
29090 int i, j;
29091 rtx op0, op1;
29092 rtx (*gen_load_even) (rtx, rtx, rtx);
29093 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
29094 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
29095
29096 switch (mode)
29097 {
29098 case V8HImode:
29099 gen_load_even = gen_vec_setv8hi;
29100 gen_interleave_first_low = gen_vec_interleave_lowv4si;
29101 gen_interleave_second_low = gen_vec_interleave_lowv2di;
29102 inner_mode = HImode;
29103 first_imode = V4SImode;
29104 second_imode = V2DImode;
29105 third_imode = VOIDmode;
29106 break;
29107 case V16QImode:
29108 gen_load_even = gen_vec_setv16qi;
29109 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
29110 gen_interleave_second_low = gen_vec_interleave_lowv4si;
29111 inner_mode = QImode;
29112 first_imode = V8HImode;
29113 second_imode = V4SImode;
29114 third_imode = V2DImode;
29115 break;
29116 default:
29117 gcc_unreachable ();
29118 }
29119
29120 for (i = 0; i < n; i++)
29121 {
29122 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
29123 op0 = gen_reg_rtx (SImode);
29124 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
29125
29126 /* Insert the SImode value as low element of V4SImode vector. */
29127 op1 = gen_reg_rtx (V4SImode);
29128 op0 = gen_rtx_VEC_MERGE (V4SImode,
29129 gen_rtx_VEC_DUPLICATE (V4SImode,
29130 op0),
29131 CONST0_RTX (V4SImode),
29132 const1_rtx);
29133 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
29134
29135 /* Cast the V4SImode vector back to a vector in orignal mode. */
29136 op0 = gen_reg_rtx (mode);
29137 emit_move_insn (op0, gen_lowpart (mode, op1));
29138
29139 /* Load even elements into the second positon. */
29140 emit_insn (gen_load_even (op0,
29141 force_reg (inner_mode,
29142 ops [i + i + 1]),
29143 const1_rtx));
29144
29145 /* Cast vector to FIRST_IMODE vector. */
29146 ops[i] = gen_reg_rtx (first_imode);
29147 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
29148 }
29149
29150 /* Interleave low FIRST_IMODE vectors. */
29151 for (i = j = 0; i < n; i += 2, j++)
29152 {
29153 op0 = gen_reg_rtx (first_imode);
29154 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
29155
29156 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
29157 ops[j] = gen_reg_rtx (second_imode);
29158 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
29159 }
29160
29161 /* Interleave low SECOND_IMODE vectors. */
29162 switch (second_imode)
29163 {
29164 case V4SImode:
29165 for (i = j = 0; i < n / 2; i += 2, j++)
29166 {
29167 op0 = gen_reg_rtx (second_imode);
29168 emit_insn (gen_interleave_second_low (op0, ops[i],
29169 ops[i + 1]));
29170
29171 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
29172 vector. */
29173 ops[j] = gen_reg_rtx (third_imode);
29174 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
29175 }
29176 second_imode = V2DImode;
29177 gen_interleave_second_low = gen_vec_interleave_lowv2di;
29178 /* FALLTHRU */
29179
29180 case V2DImode:
29181 op0 = gen_reg_rtx (second_imode);
29182 emit_insn (gen_interleave_second_low (op0, ops[0],
29183 ops[1]));
29184
29185 /* Cast the SECOND_IMODE vector back to a vector on original
29186 mode. */
29187 emit_insn (gen_rtx_SET (VOIDmode, target,
29188 gen_lowpart (mode, op0)));
29189 break;
29190
29191 default:
29192 gcc_unreachable ();
29193 }
29194 }
29195
29196 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
29197 all values variable, and none identical. */
29198
29199 static void
29200 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
29201 rtx target, rtx vals)
29202 {
29203 rtx ops[32], op0, op1;
29204 enum machine_mode half_mode = VOIDmode;
29205 int n, i;
29206
29207 switch (mode)
29208 {
29209 case V2SFmode:
29210 case V2SImode:
29211 if (!mmx_ok && !TARGET_SSE)
29212 break;
29213 /* FALLTHRU */
29214
29215 case V8SFmode:
29216 case V8SImode:
29217 case V4DFmode:
29218 case V4DImode:
29219 case V4SFmode:
29220 case V4SImode:
29221 case V2DFmode:
29222 case V2DImode:
29223 n = GET_MODE_NUNITS (mode);
29224 for (i = 0; i < n; i++)
29225 ops[i] = XVECEXP (vals, 0, i);
29226 ix86_expand_vector_init_concat (mode, target, ops, n);
29227 return;
29228
29229 case V32QImode:
29230 half_mode = V16QImode;
29231 goto half;
29232
29233 case V16HImode:
29234 half_mode = V8HImode;
29235 goto half;
29236
29237 half:
29238 n = GET_MODE_NUNITS (mode);
29239 for (i = 0; i < n; i++)
29240 ops[i] = XVECEXP (vals, 0, i);
29241 op0 = gen_reg_rtx (half_mode);
29242 op1 = gen_reg_rtx (half_mode);
29243 ix86_expand_vector_init_interleave (half_mode, op0, ops,
29244 n >> 2);
29245 ix86_expand_vector_init_interleave (half_mode, op1,
29246 &ops [n >> 1], n >> 2);
29247 emit_insn (gen_rtx_SET (VOIDmode, target,
29248 gen_rtx_VEC_CONCAT (mode, op0, op1)));
29249 return;
29250
29251 case V16QImode:
29252 if (!TARGET_SSE4_1)
29253 break;
29254 /* FALLTHRU */
29255
29256 case V8HImode:
29257 if (!TARGET_SSE2)
29258 break;
29259
29260 /* Don't use ix86_expand_vector_init_interleave if we can't
29261 move from GPR to SSE register directly. */
29262 if (!TARGET_INTER_UNIT_MOVES)
29263 break;
29264
29265 n = GET_MODE_NUNITS (mode);
29266 for (i = 0; i < n; i++)
29267 ops[i] = XVECEXP (vals, 0, i);
29268 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
29269 return;
29270
29271 case V4HImode:
29272 case V8QImode:
29273 break;
29274
29275 default:
29276 gcc_unreachable ();
29277 }
29278
29279 {
29280 int i, j, n_elts, n_words, n_elt_per_word;
29281 enum machine_mode inner_mode;
29282 rtx words[4], shift;
29283
29284 inner_mode = GET_MODE_INNER (mode);
29285 n_elts = GET_MODE_NUNITS (mode);
29286 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
29287 n_elt_per_word = n_elts / n_words;
29288 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
29289
29290 for (i = 0; i < n_words; ++i)
29291 {
29292 rtx word = NULL_RTX;
29293
29294 for (j = 0; j < n_elt_per_word; ++j)
29295 {
29296 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
29297 elt = convert_modes (word_mode, inner_mode, elt, true);
29298
29299 if (j == 0)
29300 word = elt;
29301 else
29302 {
29303 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
29304 word, 1, OPTAB_LIB_WIDEN);
29305 word = expand_simple_binop (word_mode, IOR, word, elt,
29306 word, 1, OPTAB_LIB_WIDEN);
29307 }
29308 }
29309
29310 words[i] = word;
29311 }
29312
29313 if (n_words == 1)
29314 emit_move_insn (target, gen_lowpart (mode, words[0]));
29315 else if (n_words == 2)
29316 {
29317 rtx tmp = gen_reg_rtx (mode);
29318 emit_clobber (tmp);
29319 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
29320 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
29321 emit_move_insn (target, tmp);
29322 }
29323 else if (n_words == 4)
29324 {
29325 rtx tmp = gen_reg_rtx (V4SImode);
29326 gcc_assert (word_mode == SImode);
29327 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
29328 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
29329 emit_move_insn (target, gen_lowpart (mode, tmp));
29330 }
29331 else
29332 gcc_unreachable ();
29333 }
29334 }
29335
29336 /* Initialize vector TARGET via VALS. Suppress the use of MMX
29337 instructions unless MMX_OK is true. */
29338
29339 void
29340 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
29341 {
29342 enum machine_mode mode = GET_MODE (target);
29343 enum machine_mode inner_mode = GET_MODE_INNER (mode);
29344 int n_elts = GET_MODE_NUNITS (mode);
29345 int n_var = 0, one_var = -1;
29346 bool all_same = true, all_const_zero = true;
29347 int i;
29348 rtx x;
29349
29350 for (i = 0; i < n_elts; ++i)
29351 {
29352 x = XVECEXP (vals, 0, i);
29353 if (!(CONST_INT_P (x)
29354 || GET_CODE (x) == CONST_DOUBLE
29355 || GET_CODE (x) == CONST_FIXED))
29356 n_var++, one_var = i;
29357 else if (x != CONST0_RTX (inner_mode))
29358 all_const_zero = false;
29359 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
29360 all_same = false;
29361 }
29362
29363 /* Constants are best loaded from the constant pool. */
29364 if (n_var == 0)
29365 {
29366 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
29367 return;
29368 }
29369
29370 /* If all values are identical, broadcast the value. */
29371 if (all_same
29372 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
29373 XVECEXP (vals, 0, 0)))
29374 return;
29375
29376 /* Values where only one field is non-constant are best loaded from
29377 the pool and overwritten via move later. */
29378 if (n_var == 1)
29379 {
29380 if (all_const_zero
29381 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
29382 XVECEXP (vals, 0, one_var),
29383 one_var))
29384 return;
29385
29386 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
29387 return;
29388 }
29389
29390 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
29391 }
29392
29393 void
29394 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
29395 {
29396 enum machine_mode mode = GET_MODE (target);
29397 enum machine_mode inner_mode = GET_MODE_INNER (mode);
29398 enum machine_mode half_mode;
29399 bool use_vec_merge = false;
29400 rtx tmp;
29401 static rtx (*gen_extract[6][2]) (rtx, rtx)
29402 = {
29403 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
29404 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
29405 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
29406 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
29407 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
29408 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
29409 };
29410 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
29411 = {
29412 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
29413 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
29414 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
29415 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
29416 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
29417 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
29418 };
29419 int i, j, n;
29420
29421 switch (mode)
29422 {
29423 case V2SFmode:
29424 case V2SImode:
29425 if (mmx_ok)
29426 {
29427 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
29428 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
29429 if (elt == 0)
29430 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
29431 else
29432 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
29433 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29434 return;
29435 }
29436 break;
29437
29438 case V2DImode:
29439 use_vec_merge = TARGET_SSE4_1;
29440 if (use_vec_merge)
29441 break;
29442
29443 case V2DFmode:
29444 {
29445 rtx op0, op1;
29446
29447 /* For the two element vectors, we implement a VEC_CONCAT with
29448 the extraction of the other element. */
29449
29450 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
29451 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
29452
29453 if (elt == 0)
29454 op0 = val, op1 = tmp;
29455 else
29456 op0 = tmp, op1 = val;
29457
29458 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
29459 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29460 }
29461 return;
29462
29463 case V4SFmode:
29464 use_vec_merge = TARGET_SSE4_1;
29465 if (use_vec_merge)
29466 break;
29467
29468 switch (elt)
29469 {
29470 case 0:
29471 use_vec_merge = true;
29472 break;
29473
29474 case 1:
29475 /* tmp = target = A B C D */
29476 tmp = copy_to_reg (target);
29477 /* target = A A B B */
29478 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
29479 /* target = X A B B */
29480 ix86_expand_vector_set (false, target, val, 0);
29481 /* target = A X C D */
29482 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
29483 const1_rtx, const0_rtx,
29484 GEN_INT (2+4), GEN_INT (3+4)));
29485 return;
29486
29487 case 2:
29488 /* tmp = target = A B C D */
29489 tmp = copy_to_reg (target);
29490 /* tmp = X B C D */
29491 ix86_expand_vector_set (false, tmp, val, 0);
29492 /* target = A B X D */
29493 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
29494 const0_rtx, const1_rtx,
29495 GEN_INT (0+4), GEN_INT (3+4)));
29496 return;
29497
29498 case 3:
29499 /* tmp = target = A B C D */
29500 tmp = copy_to_reg (target);
29501 /* tmp = X B C D */
29502 ix86_expand_vector_set (false, tmp, val, 0);
29503 /* target = A B X D */
29504 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
29505 const0_rtx, const1_rtx,
29506 GEN_INT (2+4), GEN_INT (0+4)));
29507 return;
29508
29509 default:
29510 gcc_unreachable ();
29511 }
29512 break;
29513
29514 case V4SImode:
29515 use_vec_merge = TARGET_SSE4_1;
29516 if (use_vec_merge)
29517 break;
29518
29519 /* Element 0 handled by vec_merge below. */
29520 if (elt == 0)
29521 {
29522 use_vec_merge = true;
29523 break;
29524 }
29525
29526 if (TARGET_SSE2)
29527 {
29528 /* With SSE2, use integer shuffles to swap element 0 and ELT,
29529 store into element 0, then shuffle them back. */
29530
29531 rtx order[4];
29532
29533 order[0] = GEN_INT (elt);
29534 order[1] = const1_rtx;
29535 order[2] = const2_rtx;
29536 order[3] = GEN_INT (3);
29537 order[elt] = const0_rtx;
29538
29539 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
29540 order[1], order[2], order[3]));
29541
29542 ix86_expand_vector_set (false, target, val, 0);
29543
29544 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
29545 order[1], order[2], order[3]));
29546 }
29547 else
29548 {
29549 /* For SSE1, we have to reuse the V4SF code. */
29550 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
29551 gen_lowpart (SFmode, val), elt);
29552 }
29553 return;
29554
29555 case V8HImode:
29556 use_vec_merge = TARGET_SSE2;
29557 break;
29558 case V4HImode:
29559 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
29560 break;
29561
29562 case V16QImode:
29563 use_vec_merge = TARGET_SSE4_1;
29564 break;
29565
29566 case V8QImode:
29567 break;
29568
29569 case V32QImode:
29570 half_mode = V16QImode;
29571 j = 0;
29572 n = 16;
29573 goto half;
29574
29575 case V16HImode:
29576 half_mode = V8HImode;
29577 j = 1;
29578 n = 8;
29579 goto half;
29580
29581 case V8SImode:
29582 half_mode = V4SImode;
29583 j = 2;
29584 n = 4;
29585 goto half;
29586
29587 case V4DImode:
29588 half_mode = V2DImode;
29589 j = 3;
29590 n = 2;
29591 goto half;
29592
29593 case V8SFmode:
29594 half_mode = V4SFmode;
29595 j = 4;
29596 n = 4;
29597 goto half;
29598
29599 case V4DFmode:
29600 half_mode = V2DFmode;
29601 j = 5;
29602 n = 2;
29603 goto half;
29604
29605 half:
29606 /* Compute offset. */
29607 i = elt / n;
29608 elt %= n;
29609
29610 gcc_assert (i <= 1);
29611
29612 /* Extract the half. */
29613 tmp = gen_reg_rtx (half_mode);
29614 emit_insn (gen_extract[j][i] (tmp, target));
29615
29616 /* Put val in tmp at elt. */
29617 ix86_expand_vector_set (false, tmp, val, elt);
29618
29619 /* Put it back. */
29620 emit_insn (gen_insert[j][i] (target, target, tmp));
29621 return;
29622
29623 default:
29624 break;
29625 }
29626
29627 if (use_vec_merge)
29628 {
29629 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
29630 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
29631 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29632 }
29633 else
29634 {
29635 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
29636
29637 emit_move_insn (mem, target);
29638
29639 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
29640 emit_move_insn (tmp, val);
29641
29642 emit_move_insn (target, mem);
29643 }
29644 }
29645
29646 void
29647 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
29648 {
29649 enum machine_mode mode = GET_MODE (vec);
29650 enum machine_mode inner_mode = GET_MODE_INNER (mode);
29651 bool use_vec_extr = false;
29652 rtx tmp;
29653
29654 switch (mode)
29655 {
29656 case V2SImode:
29657 case V2SFmode:
29658 if (!mmx_ok)
29659 break;
29660 /* FALLTHRU */
29661
29662 case V2DFmode:
29663 case V2DImode:
29664 use_vec_extr = true;
29665 break;
29666
29667 case V4SFmode:
29668 use_vec_extr = TARGET_SSE4_1;
29669 if (use_vec_extr)
29670 break;
29671
29672 switch (elt)
29673 {
29674 case 0:
29675 tmp = vec;
29676 break;
29677
29678 case 1:
29679 case 3:
29680 tmp = gen_reg_rtx (mode);
29681 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
29682 GEN_INT (elt), GEN_INT (elt),
29683 GEN_INT (elt+4), GEN_INT (elt+4)));
29684 break;
29685
29686 case 2:
29687 tmp = gen_reg_rtx (mode);
29688 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
29689 break;
29690
29691 default:
29692 gcc_unreachable ();
29693 }
29694 vec = tmp;
29695 use_vec_extr = true;
29696 elt = 0;
29697 break;
29698
29699 case V4SImode:
29700 use_vec_extr = TARGET_SSE4_1;
29701 if (use_vec_extr)
29702 break;
29703
29704 if (TARGET_SSE2)
29705 {
29706 switch (elt)
29707 {
29708 case 0:
29709 tmp = vec;
29710 break;
29711
29712 case 1:
29713 case 3:
29714 tmp = gen_reg_rtx (mode);
29715 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
29716 GEN_INT (elt), GEN_INT (elt),
29717 GEN_INT (elt), GEN_INT (elt)));
29718 break;
29719
29720 case 2:
29721 tmp = gen_reg_rtx (mode);
29722 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
29723 break;
29724
29725 default:
29726 gcc_unreachable ();
29727 }
29728 vec = tmp;
29729 use_vec_extr = true;
29730 elt = 0;
29731 }
29732 else
29733 {
29734 /* For SSE1, we have to reuse the V4SF code. */
29735 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
29736 gen_lowpart (V4SFmode, vec), elt);
29737 return;
29738 }
29739 break;
29740
29741 case V8HImode:
29742 use_vec_extr = TARGET_SSE2;
29743 break;
29744 case V4HImode:
29745 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
29746 break;
29747
29748 case V16QImode:
29749 use_vec_extr = TARGET_SSE4_1;
29750 break;
29751
29752 case V8QImode:
29753 /* ??? Could extract the appropriate HImode element and shift. */
29754 default:
29755 break;
29756 }
29757
29758 if (use_vec_extr)
29759 {
29760 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
29761 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
29762
29763 /* Let the rtl optimizers know about the zero extension performed. */
29764 if (inner_mode == QImode || inner_mode == HImode)
29765 {
29766 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
29767 target = gen_lowpart (SImode, target);
29768 }
29769
29770 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29771 }
29772 else
29773 {
29774 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
29775
29776 emit_move_insn (mem, vec);
29777
29778 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
29779 emit_move_insn (target, tmp);
29780 }
29781 }
29782
29783 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
29784 pattern to reduce; DEST is the destination; IN is the input vector. */
29785
29786 void
29787 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
29788 {
29789 rtx tmp1, tmp2, tmp3;
29790
29791 tmp1 = gen_reg_rtx (V4SFmode);
29792 tmp2 = gen_reg_rtx (V4SFmode);
29793 tmp3 = gen_reg_rtx (V4SFmode);
29794
29795 emit_insn (gen_sse_movhlps (tmp1, in, in));
29796 emit_insn (fn (tmp2, tmp1, in));
29797
29798 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
29799 const1_rtx, const1_rtx,
29800 GEN_INT (1+4), GEN_INT (1+4)));
29801 emit_insn (fn (dest, tmp2, tmp3));
29802 }
29803 \f
29804 /* Target hook for scalar_mode_supported_p. */
29805 static bool
29806 ix86_scalar_mode_supported_p (enum machine_mode mode)
29807 {
29808 if (DECIMAL_FLOAT_MODE_P (mode))
29809 return default_decimal_float_supported_p ();
29810 else if (mode == TFmode)
29811 return true;
29812 else
29813 return default_scalar_mode_supported_p (mode);
29814 }
29815
29816 /* Implements target hook vector_mode_supported_p. */
29817 static bool
29818 ix86_vector_mode_supported_p (enum machine_mode mode)
29819 {
29820 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
29821 return true;
29822 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
29823 return true;
29824 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
29825 return true;
29826 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
29827 return true;
29828 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
29829 return true;
29830 return false;
29831 }
29832
29833 /* Target hook for c_mode_for_suffix. */
29834 static enum machine_mode
29835 ix86_c_mode_for_suffix (char suffix)
29836 {
29837 if (suffix == 'q')
29838 return TFmode;
29839 if (suffix == 'w')
29840 return XFmode;
29841
29842 return VOIDmode;
29843 }
29844
29845 /* Worker function for TARGET_MD_ASM_CLOBBERS.
29846
29847 We do this in the new i386 backend to maintain source compatibility
29848 with the old cc0-based compiler. */
29849
29850 static tree
29851 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
29852 tree inputs ATTRIBUTE_UNUSED,
29853 tree clobbers)
29854 {
29855 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
29856 clobbers);
29857 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
29858 clobbers);
29859 return clobbers;
29860 }
29861
29862 /* Implements target vector targetm.asm.encode_section_info. This
29863 is not used by netware. */
29864
29865 static void ATTRIBUTE_UNUSED
29866 ix86_encode_section_info (tree decl, rtx rtl, int first)
29867 {
29868 default_encode_section_info (decl, rtl, first);
29869
29870 if (TREE_CODE (decl) == VAR_DECL
29871 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
29872 && ix86_in_large_data_p (decl))
29873 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
29874 }
29875
29876 /* Worker function for REVERSE_CONDITION. */
29877
29878 enum rtx_code
29879 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
29880 {
29881 return (mode != CCFPmode && mode != CCFPUmode
29882 ? reverse_condition (code)
29883 : reverse_condition_maybe_unordered (code));
29884 }
29885
29886 /* Output code to perform an x87 FP register move, from OPERANDS[1]
29887 to OPERANDS[0]. */
29888
29889 const char *
29890 output_387_reg_move (rtx insn, rtx *operands)
29891 {
29892 if (REG_P (operands[0]))
29893 {
29894 if (REG_P (operands[1])
29895 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29896 {
29897 if (REGNO (operands[0]) == FIRST_STACK_REG)
29898 return output_387_ffreep (operands, 0);
29899 return "fstp\t%y0";
29900 }
29901 if (STACK_TOP_P (operands[0]))
29902 return "fld%Z1\t%y1";
29903 return "fst\t%y0";
29904 }
29905 else if (MEM_P (operands[0]))
29906 {
29907 gcc_assert (REG_P (operands[1]));
29908 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29909 return "fstp%Z0\t%y0";
29910 else
29911 {
29912 /* There is no non-popping store to memory for XFmode.
29913 So if we need one, follow the store with a load. */
29914 if (GET_MODE (operands[0]) == XFmode)
29915 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
29916 else
29917 return "fst%Z0\t%y0";
29918 }
29919 }
29920 else
29921 gcc_unreachable();
29922 }
29923
29924 /* Output code to perform a conditional jump to LABEL, if C2 flag in
29925 FP status register is set. */
29926
29927 void
29928 ix86_emit_fp_unordered_jump (rtx label)
29929 {
29930 rtx reg = gen_reg_rtx (HImode);
29931 rtx temp;
29932
29933 emit_insn (gen_x86_fnstsw_1 (reg));
29934
29935 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
29936 {
29937 emit_insn (gen_x86_sahf_1 (reg));
29938
29939 temp = gen_rtx_REG (CCmode, FLAGS_REG);
29940 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
29941 }
29942 else
29943 {
29944 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
29945
29946 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
29947 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
29948 }
29949
29950 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
29951 gen_rtx_LABEL_REF (VOIDmode, label),
29952 pc_rtx);
29953 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
29954
29955 emit_jump_insn (temp);
29956 predict_jump (REG_BR_PROB_BASE * 10 / 100);
29957 }
29958
29959 /* Output code to perform a log1p XFmode calculation. */
29960
29961 void ix86_emit_i387_log1p (rtx op0, rtx op1)
29962 {
29963 rtx label1 = gen_label_rtx ();
29964 rtx label2 = gen_label_rtx ();
29965
29966 rtx tmp = gen_reg_rtx (XFmode);
29967 rtx tmp2 = gen_reg_rtx (XFmode);
29968 rtx test;
29969
29970 emit_insn (gen_absxf2 (tmp, op1));
29971 test = gen_rtx_GE (VOIDmode, tmp,
29972 CONST_DOUBLE_FROM_REAL_VALUE (
29973 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
29974 XFmode));
29975 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
29976
29977 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29978 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
29979 emit_jump (label2);
29980
29981 emit_label (label1);
29982 emit_move_insn (tmp, CONST1_RTX (XFmode));
29983 emit_insn (gen_addxf3 (tmp, op1, tmp));
29984 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29985 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
29986
29987 emit_label (label2);
29988 }
29989
29990 /* Output code to perform a Newton-Rhapson approximation of a single precision
29991 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
29992
29993 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
29994 {
29995 rtx x0, x1, e0, e1, two;
29996
29997 x0 = gen_reg_rtx (mode);
29998 e0 = gen_reg_rtx (mode);
29999 e1 = gen_reg_rtx (mode);
30000 x1 = gen_reg_rtx (mode);
30001
30002 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
30003
30004 if (VECTOR_MODE_P (mode))
30005 two = ix86_build_const_vector (SFmode, true, two);
30006
30007 two = force_reg (mode, two);
30008
30009 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
30010
30011 /* x0 = rcp(b) estimate */
30012 emit_insn (gen_rtx_SET (VOIDmode, x0,
30013 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
30014 UNSPEC_RCP)));
30015 /* e0 = x0 * a */
30016 emit_insn (gen_rtx_SET (VOIDmode, e0,
30017 gen_rtx_MULT (mode, x0, a)));
30018 /* e1 = x0 * b */
30019 emit_insn (gen_rtx_SET (VOIDmode, e1,
30020 gen_rtx_MULT (mode, x0, b)));
30021 /* x1 = 2. - e1 */
30022 emit_insn (gen_rtx_SET (VOIDmode, x1,
30023 gen_rtx_MINUS (mode, two, e1)));
30024 /* res = e0 * x1 */
30025 emit_insn (gen_rtx_SET (VOIDmode, res,
30026 gen_rtx_MULT (mode, e0, x1)));
30027 }
30028
30029 /* Output code to perform a Newton-Rhapson approximation of a
30030 single precision floating point [reciprocal] square root. */
30031
30032 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
30033 bool recip)
30034 {
30035 rtx x0, e0, e1, e2, e3, mthree, mhalf;
30036 REAL_VALUE_TYPE r;
30037
30038 x0 = gen_reg_rtx (mode);
30039 e0 = gen_reg_rtx (mode);
30040 e1 = gen_reg_rtx (mode);
30041 e2 = gen_reg_rtx (mode);
30042 e3 = gen_reg_rtx (mode);
30043
30044 real_from_integer (&r, VOIDmode, -3, -1, 0);
30045 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
30046
30047 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
30048 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
30049
30050 if (VECTOR_MODE_P (mode))
30051 {
30052 mthree = ix86_build_const_vector (SFmode, true, mthree);
30053 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
30054 }
30055
30056 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
30057 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
30058
30059 /* x0 = rsqrt(a) estimate */
30060 emit_insn (gen_rtx_SET (VOIDmode, x0,
30061 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
30062 UNSPEC_RSQRT)));
30063
30064 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
30065 if (!recip)
30066 {
30067 rtx zero, mask;
30068
30069 zero = gen_reg_rtx (mode);
30070 mask = gen_reg_rtx (mode);
30071
30072 zero = force_reg (mode, CONST0_RTX(mode));
30073 emit_insn (gen_rtx_SET (VOIDmode, mask,
30074 gen_rtx_NE (mode, zero, a)));
30075
30076 emit_insn (gen_rtx_SET (VOIDmode, x0,
30077 gen_rtx_AND (mode, x0, mask)));
30078 }
30079
30080 /* e0 = x0 * a */
30081 emit_insn (gen_rtx_SET (VOIDmode, e0,
30082 gen_rtx_MULT (mode, x0, a)));
30083 /* e1 = e0 * x0 */
30084 emit_insn (gen_rtx_SET (VOIDmode, e1,
30085 gen_rtx_MULT (mode, e0, x0)));
30086
30087 /* e2 = e1 - 3. */
30088 mthree = force_reg (mode, mthree);
30089 emit_insn (gen_rtx_SET (VOIDmode, e2,
30090 gen_rtx_PLUS (mode, e1, mthree)));
30091
30092 mhalf = force_reg (mode, mhalf);
30093 if (recip)
30094 /* e3 = -.5 * x0 */
30095 emit_insn (gen_rtx_SET (VOIDmode, e3,
30096 gen_rtx_MULT (mode, x0, mhalf)));
30097 else
30098 /* e3 = -.5 * e0 */
30099 emit_insn (gen_rtx_SET (VOIDmode, e3,
30100 gen_rtx_MULT (mode, e0, mhalf)));
30101 /* ret = e2 * e3 */
30102 emit_insn (gen_rtx_SET (VOIDmode, res,
30103 gen_rtx_MULT (mode, e2, e3)));
30104 }
30105
30106 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
30107
30108 static void ATTRIBUTE_UNUSED
30109 i386_solaris_elf_named_section (const char *name, unsigned int flags,
30110 tree decl)
30111 {
30112 /* With Binutils 2.15, the "@unwind" marker must be specified on
30113 every occurrence of the ".eh_frame" section, not just the first
30114 one. */
30115 if (TARGET_64BIT
30116 && strcmp (name, ".eh_frame") == 0)
30117 {
30118 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
30119 flags & SECTION_WRITE ? "aw" : "a");
30120 return;
30121 }
30122 default_elf_asm_named_section (name, flags, decl);
30123 }
30124
30125 /* Return the mangling of TYPE if it is an extended fundamental type. */
30126
30127 static const char *
30128 ix86_mangle_type (const_tree type)
30129 {
30130 type = TYPE_MAIN_VARIANT (type);
30131
30132 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
30133 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
30134 return NULL;
30135
30136 switch (TYPE_MODE (type))
30137 {
30138 case TFmode:
30139 /* __float128 is "g". */
30140 return "g";
30141 case XFmode:
30142 /* "long double" or __float80 is "e". */
30143 return "e";
30144 default:
30145 return NULL;
30146 }
30147 }
30148
30149 /* For 32-bit code we can save PIC register setup by using
30150 __stack_chk_fail_local hidden function instead of calling
30151 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
30152 register, so it is better to call __stack_chk_fail directly. */
30153
30154 static tree
30155 ix86_stack_protect_fail (void)
30156 {
30157 return TARGET_64BIT
30158 ? default_external_stack_protect_fail ()
30159 : default_hidden_stack_protect_fail ();
30160 }
30161
30162 /* Select a format to encode pointers in exception handling data. CODE
30163 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
30164 true if the symbol may be affected by dynamic relocations.
30165
30166 ??? All x86 object file formats are capable of representing this.
30167 After all, the relocation needed is the same as for the call insn.
30168 Whether or not a particular assembler allows us to enter such, I
30169 guess we'll have to see. */
30170 int
30171 asm_preferred_eh_data_format (int code, int global)
30172 {
30173 if (flag_pic)
30174 {
30175 int type = DW_EH_PE_sdata8;
30176 if (!TARGET_64BIT
30177 || ix86_cmodel == CM_SMALL_PIC
30178 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
30179 type = DW_EH_PE_sdata4;
30180 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
30181 }
30182 if (ix86_cmodel == CM_SMALL
30183 || (ix86_cmodel == CM_MEDIUM && code))
30184 return DW_EH_PE_udata4;
30185 return DW_EH_PE_absptr;
30186 }
30187 \f
30188 /* Expand copysign from SIGN to the positive value ABS_VALUE
30189 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
30190 the sign-bit. */
30191 static void
30192 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
30193 {
30194 enum machine_mode mode = GET_MODE (sign);
30195 rtx sgn = gen_reg_rtx (mode);
30196 if (mask == NULL_RTX)
30197 {
30198 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
30199 if (!VECTOR_MODE_P (mode))
30200 {
30201 /* We need to generate a scalar mode mask in this case. */
30202 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
30203 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
30204 mask = gen_reg_rtx (mode);
30205 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
30206 }
30207 }
30208 else
30209 mask = gen_rtx_NOT (mode, mask);
30210 emit_insn (gen_rtx_SET (VOIDmode, sgn,
30211 gen_rtx_AND (mode, mask, sign)));
30212 emit_insn (gen_rtx_SET (VOIDmode, result,
30213 gen_rtx_IOR (mode, abs_value, sgn)));
30214 }
30215
30216 /* Expand fabs (OP0) and return a new rtx that holds the result. The
30217 mask for masking out the sign-bit is stored in *SMASK, if that is
30218 non-null. */
30219 static rtx
30220 ix86_expand_sse_fabs (rtx op0, rtx *smask)
30221 {
30222 enum machine_mode mode = GET_MODE (op0);
30223 rtx xa, mask;
30224
30225 xa = gen_reg_rtx (mode);
30226 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
30227 if (!VECTOR_MODE_P (mode))
30228 {
30229 /* We need to generate a scalar mode mask in this case. */
30230 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
30231 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
30232 mask = gen_reg_rtx (mode);
30233 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
30234 }
30235 emit_insn (gen_rtx_SET (VOIDmode, xa,
30236 gen_rtx_AND (mode, op0, mask)));
30237
30238 if (smask)
30239 *smask = mask;
30240
30241 return xa;
30242 }
30243
30244 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
30245 swapping the operands if SWAP_OPERANDS is true. The expanded
30246 code is a forward jump to a newly created label in case the
30247 comparison is true. The generated label rtx is returned. */
30248 static rtx
30249 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
30250 bool swap_operands)
30251 {
30252 rtx label, tmp;
30253
30254 if (swap_operands)
30255 {
30256 tmp = op0;
30257 op0 = op1;
30258 op1 = tmp;
30259 }
30260
30261 label = gen_label_rtx ();
30262 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
30263 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30264 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
30265 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
30266 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
30267 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
30268 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
30269 JUMP_LABEL (tmp) = label;
30270
30271 return label;
30272 }
30273
30274 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
30275 using comparison code CODE. Operands are swapped for the comparison if
30276 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
30277 static rtx
30278 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
30279 bool swap_operands)
30280 {
30281 enum machine_mode mode = GET_MODE (op0);
30282 rtx mask = gen_reg_rtx (mode);
30283
30284 if (swap_operands)
30285 {
30286 rtx tmp = op0;
30287 op0 = op1;
30288 op1 = tmp;
30289 }
30290
30291 if (mode == DFmode)
30292 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
30293 gen_rtx_fmt_ee (code, mode, op0, op1)));
30294 else
30295 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
30296 gen_rtx_fmt_ee (code, mode, op0, op1)));
30297
30298 return mask;
30299 }
30300
30301 /* Generate and return a rtx of mode MODE for 2**n where n is the number
30302 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
30303 static rtx
30304 ix86_gen_TWO52 (enum machine_mode mode)
30305 {
30306 REAL_VALUE_TYPE TWO52r;
30307 rtx TWO52;
30308
30309 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
30310 TWO52 = const_double_from_real_value (TWO52r, mode);
30311 TWO52 = force_reg (mode, TWO52);
30312
30313 return TWO52;
30314 }
30315
30316 /* Expand SSE sequence for computing lround from OP1 storing
30317 into OP0. */
30318 void
30319 ix86_expand_lround (rtx op0, rtx op1)
30320 {
30321 /* C code for the stuff we're doing below:
30322 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
30323 return (long)tmp;
30324 */
30325 enum machine_mode mode = GET_MODE (op1);
30326 const struct real_format *fmt;
30327 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
30328 rtx adj;
30329
30330 /* load nextafter (0.5, 0.0) */
30331 fmt = REAL_MODE_FORMAT (mode);
30332 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
30333 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
30334
30335 /* adj = copysign (0.5, op1) */
30336 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
30337 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
30338
30339 /* adj = op1 + adj */
30340 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
30341
30342 /* op0 = (imode)adj */
30343 expand_fix (op0, adj, 0);
30344 }
30345
30346 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
30347 into OPERAND0. */
30348 void
30349 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
30350 {
30351 /* C code for the stuff we're doing below (for do_floor):
30352 xi = (long)op1;
30353 xi -= (double)xi > op1 ? 1 : 0;
30354 return xi;
30355 */
30356 enum machine_mode fmode = GET_MODE (op1);
30357 enum machine_mode imode = GET_MODE (op0);
30358 rtx ireg, freg, label, tmp;
30359
30360 /* reg = (long)op1 */
30361 ireg = gen_reg_rtx (imode);
30362 expand_fix (ireg, op1, 0);
30363
30364 /* freg = (double)reg */
30365 freg = gen_reg_rtx (fmode);
30366 expand_float (freg, ireg, 0);
30367
30368 /* ireg = (freg > op1) ? ireg - 1 : ireg */
30369 label = ix86_expand_sse_compare_and_jump (UNLE,
30370 freg, op1, !do_floor);
30371 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
30372 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
30373 emit_move_insn (ireg, tmp);
30374
30375 emit_label (label);
30376 LABEL_NUSES (label) = 1;
30377
30378 emit_move_insn (op0, ireg);
30379 }
30380
30381 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
30382 result in OPERAND0. */
30383 void
30384 ix86_expand_rint (rtx operand0, rtx operand1)
30385 {
30386 /* C code for the stuff we're doing below:
30387 xa = fabs (operand1);
30388 if (!isless (xa, 2**52))
30389 return operand1;
30390 xa = xa + 2**52 - 2**52;
30391 return copysign (xa, operand1);
30392 */
30393 enum machine_mode mode = GET_MODE (operand0);
30394 rtx res, xa, label, TWO52, mask;
30395
30396 res = gen_reg_rtx (mode);
30397 emit_move_insn (res, operand1);
30398
30399 /* xa = abs (operand1) */
30400 xa = ix86_expand_sse_fabs (res, &mask);
30401
30402 /* if (!isless (xa, TWO52)) goto label; */
30403 TWO52 = ix86_gen_TWO52 (mode);
30404 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30405
30406 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30407 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
30408
30409 ix86_sse_copysign_to_positive (res, xa, res, mask);
30410
30411 emit_label (label);
30412 LABEL_NUSES (label) = 1;
30413
30414 emit_move_insn (operand0, res);
30415 }
30416
30417 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
30418 into OPERAND0. */
30419 void
30420 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
30421 {
30422 /* C code for the stuff we expand below.
30423 double xa = fabs (x), x2;
30424 if (!isless (xa, TWO52))
30425 return x;
30426 xa = xa + TWO52 - TWO52;
30427 x2 = copysign (xa, x);
30428 Compensate. Floor:
30429 if (x2 > x)
30430 x2 -= 1;
30431 Compensate. Ceil:
30432 if (x2 < x)
30433 x2 -= -1;
30434 return x2;
30435 */
30436 enum machine_mode mode = GET_MODE (operand0);
30437 rtx xa, TWO52, tmp, label, one, res, mask;
30438
30439 TWO52 = ix86_gen_TWO52 (mode);
30440
30441 /* Temporary for holding the result, initialized to the input
30442 operand to ease control flow. */
30443 res = gen_reg_rtx (mode);
30444 emit_move_insn (res, operand1);
30445
30446 /* xa = abs (operand1) */
30447 xa = ix86_expand_sse_fabs (res, &mask);
30448
30449 /* if (!isless (xa, TWO52)) goto label; */
30450 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30451
30452 /* xa = xa + TWO52 - TWO52; */
30453 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30454 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
30455
30456 /* xa = copysign (xa, operand1) */
30457 ix86_sse_copysign_to_positive (xa, xa, res, mask);
30458
30459 /* generate 1.0 or -1.0 */
30460 one = force_reg (mode,
30461 const_double_from_real_value (do_floor
30462 ? dconst1 : dconstm1, mode));
30463
30464 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
30465 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
30466 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30467 gen_rtx_AND (mode, one, tmp)));
30468 /* We always need to subtract here to preserve signed zero. */
30469 tmp = expand_simple_binop (mode, MINUS,
30470 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30471 emit_move_insn (res, tmp);
30472
30473 emit_label (label);
30474 LABEL_NUSES (label) = 1;
30475
30476 emit_move_insn (operand0, res);
30477 }
30478
30479 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
30480 into OPERAND0. */
30481 void
30482 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
30483 {
30484 /* C code for the stuff we expand below.
30485 double xa = fabs (x), x2;
30486 if (!isless (xa, TWO52))
30487 return x;
30488 x2 = (double)(long)x;
30489 Compensate. Floor:
30490 if (x2 > x)
30491 x2 -= 1;
30492 Compensate. Ceil:
30493 if (x2 < x)
30494 x2 += 1;
30495 if (HONOR_SIGNED_ZEROS (mode))
30496 return copysign (x2, x);
30497 return x2;
30498 */
30499 enum machine_mode mode = GET_MODE (operand0);
30500 rtx xa, xi, TWO52, tmp, label, one, res, mask;
30501
30502 TWO52 = ix86_gen_TWO52 (mode);
30503
30504 /* Temporary for holding the result, initialized to the input
30505 operand to ease control flow. */
30506 res = gen_reg_rtx (mode);
30507 emit_move_insn (res, operand1);
30508
30509 /* xa = abs (operand1) */
30510 xa = ix86_expand_sse_fabs (res, &mask);
30511
30512 /* if (!isless (xa, TWO52)) goto label; */
30513 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30514
30515 /* xa = (double)(long)x */
30516 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30517 expand_fix (xi, res, 0);
30518 expand_float (xa, xi, 0);
30519
30520 /* generate 1.0 */
30521 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
30522
30523 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
30524 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
30525 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30526 gen_rtx_AND (mode, one, tmp)));
30527 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
30528 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30529 emit_move_insn (res, tmp);
30530
30531 if (HONOR_SIGNED_ZEROS (mode))
30532 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
30533
30534 emit_label (label);
30535 LABEL_NUSES (label) = 1;
30536
30537 emit_move_insn (operand0, res);
30538 }
30539
30540 /* Expand SSE sequence for computing round from OPERAND1 storing
30541 into OPERAND0. Sequence that works without relying on DImode truncation
30542 via cvttsd2siq that is only available on 64bit targets. */
30543 void
30544 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
30545 {
30546 /* C code for the stuff we expand below.
30547 double xa = fabs (x), xa2, x2;
30548 if (!isless (xa, TWO52))
30549 return x;
30550 Using the absolute value and copying back sign makes
30551 -0.0 -> -0.0 correct.
30552 xa2 = xa + TWO52 - TWO52;
30553 Compensate.
30554 dxa = xa2 - xa;
30555 if (dxa <= -0.5)
30556 xa2 += 1;
30557 else if (dxa > 0.5)
30558 xa2 -= 1;
30559 x2 = copysign (xa2, x);
30560 return x2;
30561 */
30562 enum machine_mode mode = GET_MODE (operand0);
30563 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
30564
30565 TWO52 = ix86_gen_TWO52 (mode);
30566
30567 /* Temporary for holding the result, initialized to the input
30568 operand to ease control flow. */
30569 res = gen_reg_rtx (mode);
30570 emit_move_insn (res, operand1);
30571
30572 /* xa = abs (operand1) */
30573 xa = ix86_expand_sse_fabs (res, &mask);
30574
30575 /* if (!isless (xa, TWO52)) goto label; */
30576 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30577
30578 /* xa2 = xa + TWO52 - TWO52; */
30579 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30580 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
30581
30582 /* dxa = xa2 - xa; */
30583 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
30584
30585 /* generate 0.5, 1.0 and -0.5 */
30586 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
30587 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
30588 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
30589 0, OPTAB_DIRECT);
30590
30591 /* Compensate. */
30592 tmp = gen_reg_rtx (mode);
30593 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
30594 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
30595 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30596 gen_rtx_AND (mode, one, tmp)));
30597 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30598 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
30599 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
30600 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30601 gen_rtx_AND (mode, one, tmp)));
30602 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30603
30604 /* res = copysign (xa2, operand1) */
30605 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
30606
30607 emit_label (label);
30608 LABEL_NUSES (label) = 1;
30609
30610 emit_move_insn (operand0, res);
30611 }
30612
30613 /* Expand SSE sequence for computing trunc from OPERAND1 storing
30614 into OPERAND0. */
30615 void
30616 ix86_expand_trunc (rtx operand0, rtx operand1)
30617 {
30618 /* C code for SSE variant we expand below.
30619 double xa = fabs (x), x2;
30620 if (!isless (xa, TWO52))
30621 return x;
30622 x2 = (double)(long)x;
30623 if (HONOR_SIGNED_ZEROS (mode))
30624 return copysign (x2, x);
30625 return x2;
30626 */
30627 enum machine_mode mode = GET_MODE (operand0);
30628 rtx xa, xi, TWO52, label, res, mask;
30629
30630 TWO52 = ix86_gen_TWO52 (mode);
30631
30632 /* Temporary for holding the result, initialized to the input
30633 operand to ease control flow. */
30634 res = gen_reg_rtx (mode);
30635 emit_move_insn (res, operand1);
30636
30637 /* xa = abs (operand1) */
30638 xa = ix86_expand_sse_fabs (res, &mask);
30639
30640 /* if (!isless (xa, TWO52)) goto label; */
30641 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30642
30643 /* x = (double)(long)x */
30644 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30645 expand_fix (xi, res, 0);
30646 expand_float (res, xi, 0);
30647
30648 if (HONOR_SIGNED_ZEROS (mode))
30649 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
30650
30651 emit_label (label);
30652 LABEL_NUSES (label) = 1;
30653
30654 emit_move_insn (operand0, res);
30655 }
30656
30657 /* Expand SSE sequence for computing trunc from OPERAND1 storing
30658 into OPERAND0. */
30659 void
30660 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
30661 {
30662 enum machine_mode mode = GET_MODE (operand0);
30663 rtx xa, mask, TWO52, label, one, res, smask, tmp;
30664
30665 /* C code for SSE variant we expand below.
30666 double xa = fabs (x), x2;
30667 if (!isless (xa, TWO52))
30668 return x;
30669 xa2 = xa + TWO52 - TWO52;
30670 Compensate:
30671 if (xa2 > xa)
30672 xa2 -= 1.0;
30673 x2 = copysign (xa2, x);
30674 return x2;
30675 */
30676
30677 TWO52 = ix86_gen_TWO52 (mode);
30678
30679 /* Temporary for holding the result, initialized to the input
30680 operand to ease control flow. */
30681 res = gen_reg_rtx (mode);
30682 emit_move_insn (res, operand1);
30683
30684 /* xa = abs (operand1) */
30685 xa = ix86_expand_sse_fabs (res, &smask);
30686
30687 /* if (!isless (xa, TWO52)) goto label; */
30688 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30689
30690 /* res = xa + TWO52 - TWO52; */
30691 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30692 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
30693 emit_move_insn (res, tmp);
30694
30695 /* generate 1.0 */
30696 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
30697
30698 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
30699 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
30700 emit_insn (gen_rtx_SET (VOIDmode, mask,
30701 gen_rtx_AND (mode, mask, one)));
30702 tmp = expand_simple_binop (mode, MINUS,
30703 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
30704 emit_move_insn (res, tmp);
30705
30706 /* res = copysign (res, operand1) */
30707 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
30708
30709 emit_label (label);
30710 LABEL_NUSES (label) = 1;
30711
30712 emit_move_insn (operand0, res);
30713 }
30714
30715 /* Expand SSE sequence for computing round from OPERAND1 storing
30716 into OPERAND0. */
30717 void
30718 ix86_expand_round (rtx operand0, rtx operand1)
30719 {
30720 /* C code for the stuff we're doing below:
30721 double xa = fabs (x);
30722 if (!isless (xa, TWO52))
30723 return x;
30724 xa = (double)(long)(xa + nextafter (0.5, 0.0));
30725 return copysign (xa, x);
30726 */
30727 enum machine_mode mode = GET_MODE (operand0);
30728 rtx res, TWO52, xa, label, xi, half, mask;
30729 const struct real_format *fmt;
30730 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
30731
30732 /* Temporary for holding the result, initialized to the input
30733 operand to ease control flow. */
30734 res = gen_reg_rtx (mode);
30735 emit_move_insn (res, operand1);
30736
30737 TWO52 = ix86_gen_TWO52 (mode);
30738 xa = ix86_expand_sse_fabs (res, &mask);
30739 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30740
30741 /* load nextafter (0.5, 0.0) */
30742 fmt = REAL_MODE_FORMAT (mode);
30743 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
30744 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
30745
30746 /* xa = xa + 0.5 */
30747 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
30748 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
30749
30750 /* xa = (double)(int64_t)xa */
30751 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30752 expand_fix (xi, xa, 0);
30753 expand_float (xa, xi, 0);
30754
30755 /* res = copysign (xa, operand1) */
30756 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
30757
30758 emit_label (label);
30759 LABEL_NUSES (label) = 1;
30760
30761 emit_move_insn (operand0, res);
30762 }
30763 \f
30764
30765 /* Table of valid machine attributes. */
30766 static const struct attribute_spec ix86_attribute_table[] =
30767 {
30768 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
30769 /* Stdcall attribute says callee is responsible for popping arguments
30770 if they are not variable. */
30771 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30772 /* Fastcall attribute says callee is responsible for popping arguments
30773 if they are not variable. */
30774 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30775 /* Thiscall attribute says callee is responsible for popping arguments
30776 if they are not variable. */
30777 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30778 /* Cdecl attribute says the callee is a normal C declaration */
30779 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30780 /* Regparm attribute specifies how many integer arguments are to be
30781 passed in registers. */
30782 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
30783 /* Sseregparm attribute says we are using x86_64 calling conventions
30784 for FP arguments. */
30785 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30786 /* force_align_arg_pointer says this function realigns the stack at entry. */
30787 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
30788 false, true, true, ix86_handle_cconv_attribute },
30789 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30790 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
30791 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
30792 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
30793 #endif
30794 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30795 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30796 #ifdef SUBTARGET_ATTRIBUTE_TABLE
30797 SUBTARGET_ATTRIBUTE_TABLE,
30798 #endif
30799 /* ms_abi and sysv_abi calling convention function attributes. */
30800 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30801 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30802 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute },
30803 /* End element. */
30804 { NULL, 0, 0, false, false, false, NULL }
30805 };
30806
30807 /* Implement targetm.vectorize.builtin_vectorization_cost. */
30808 static int
30809 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
30810 tree vectype ATTRIBUTE_UNUSED,
30811 int misalign ATTRIBUTE_UNUSED)
30812 {
30813 switch (type_of_cost)
30814 {
30815 case scalar_stmt:
30816 return ix86_cost->scalar_stmt_cost;
30817
30818 case scalar_load:
30819 return ix86_cost->scalar_load_cost;
30820
30821 case scalar_store:
30822 return ix86_cost->scalar_store_cost;
30823
30824 case vector_stmt:
30825 return ix86_cost->vec_stmt_cost;
30826
30827 case vector_load:
30828 return ix86_cost->vec_align_load_cost;
30829
30830 case vector_store:
30831 return ix86_cost->vec_store_cost;
30832
30833 case vec_to_scalar:
30834 return ix86_cost->vec_to_scalar_cost;
30835
30836 case scalar_to_vec:
30837 return ix86_cost->scalar_to_vec_cost;
30838
30839 case unaligned_load:
30840 case unaligned_store:
30841 return ix86_cost->vec_unalign_load_cost;
30842
30843 case cond_branch_taken:
30844 return ix86_cost->cond_taken_branch_cost;
30845
30846 case cond_branch_not_taken:
30847 return ix86_cost->cond_not_taken_branch_cost;
30848
30849 case vec_perm:
30850 return 1;
30851
30852 default:
30853 gcc_unreachable ();
30854 }
30855 }
30856
30857
30858 /* Implement targetm.vectorize.builtin_vec_perm. */
30859
30860 static tree
30861 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
30862 {
30863 tree itype = TREE_TYPE (vec_type);
30864 bool u = TYPE_UNSIGNED (itype);
30865 enum machine_mode vmode = TYPE_MODE (vec_type);
30866 enum ix86_builtins fcode;
30867 bool ok = TARGET_SSE2;
30868
30869 switch (vmode)
30870 {
30871 case V4DFmode:
30872 ok = TARGET_AVX;
30873 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
30874 goto get_di;
30875 case V2DFmode:
30876 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
30877 get_di:
30878 itype = ix86_get_builtin_type (IX86_BT_DI);
30879 break;
30880
30881 case V8SFmode:
30882 ok = TARGET_AVX;
30883 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
30884 goto get_si;
30885 case V4SFmode:
30886 ok = TARGET_SSE;
30887 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
30888 get_si:
30889 itype = ix86_get_builtin_type (IX86_BT_SI);
30890 break;
30891
30892 case V2DImode:
30893 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
30894 break;
30895 case V4SImode:
30896 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
30897 break;
30898 case V8HImode:
30899 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
30900 break;
30901 case V16QImode:
30902 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
30903 break;
30904 default:
30905 ok = false;
30906 break;
30907 }
30908
30909 if (!ok)
30910 return NULL_TREE;
30911
30912 *mask_type = itype;
30913 return ix86_builtins[(int) fcode];
30914 }
30915
30916 /* Return a vector mode with twice as many elements as VMODE. */
30917 /* ??? Consider moving this to a table generated by genmodes.c. */
30918
30919 static enum machine_mode
30920 doublesize_vector_mode (enum machine_mode vmode)
30921 {
30922 switch (vmode)
30923 {
30924 case V2SFmode: return V4SFmode;
30925 case V1DImode: return V2DImode;
30926 case V2SImode: return V4SImode;
30927 case V4HImode: return V8HImode;
30928 case V8QImode: return V16QImode;
30929
30930 case V2DFmode: return V4DFmode;
30931 case V4SFmode: return V8SFmode;
30932 case V2DImode: return V4DImode;
30933 case V4SImode: return V8SImode;
30934 case V8HImode: return V16HImode;
30935 case V16QImode: return V32QImode;
30936
30937 case V4DFmode: return V8DFmode;
30938 case V8SFmode: return V16SFmode;
30939 case V4DImode: return V8DImode;
30940 case V8SImode: return V16SImode;
30941 case V16HImode: return V32HImode;
30942 case V32QImode: return V64QImode;
30943
30944 default:
30945 gcc_unreachable ();
30946 }
30947 }
30948
30949 /* Construct (set target (vec_select op0 (parallel perm))) and
30950 return true if that's a valid instruction in the active ISA. */
30951
30952 static bool
30953 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
30954 {
30955 rtx rperm[MAX_VECT_LEN], x;
30956 unsigned i;
30957
30958 for (i = 0; i < nelt; ++i)
30959 rperm[i] = GEN_INT (perm[i]);
30960
30961 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
30962 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
30963 x = gen_rtx_SET (VOIDmode, target, x);
30964
30965 x = emit_insn (x);
30966 if (recog_memoized (x) < 0)
30967 {
30968 remove_insn (x);
30969 return false;
30970 }
30971 return true;
30972 }
30973
30974 /* Similar, but generate a vec_concat from op0 and op1 as well. */
30975
30976 static bool
30977 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
30978 const unsigned char *perm, unsigned nelt)
30979 {
30980 enum machine_mode v2mode;
30981 rtx x;
30982
30983 v2mode = doublesize_vector_mode (GET_MODE (op0));
30984 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
30985 return expand_vselect (target, x, perm, nelt);
30986 }
30987
30988 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30989 in terms of blendp[sd] / pblendw / pblendvb. */
30990
30991 static bool
30992 expand_vec_perm_blend (struct expand_vec_perm_d *d)
30993 {
30994 enum machine_mode vmode = d->vmode;
30995 unsigned i, mask, nelt = d->nelt;
30996 rtx target, op0, op1, x;
30997
30998 if (!TARGET_SSE4_1 || d->op0 == d->op1)
30999 return false;
31000 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
31001 return false;
31002
31003 /* This is a blend, not a permute. Elements must stay in their
31004 respective lanes. */
31005 for (i = 0; i < nelt; ++i)
31006 {
31007 unsigned e = d->perm[i];
31008 if (!(e == i || e == i + nelt))
31009 return false;
31010 }
31011
31012 if (d->testing_p)
31013 return true;
31014
31015 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
31016 decision should be extracted elsewhere, so that we only try that
31017 sequence once all budget==3 options have been tried. */
31018
31019 /* For bytes, see if bytes move in pairs so we can use pblendw with
31020 an immediate argument, rather than pblendvb with a vector argument. */
31021 if (vmode == V16QImode)
31022 {
31023 bool pblendw_ok = true;
31024 for (i = 0; i < 16 && pblendw_ok; i += 2)
31025 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
31026
31027 if (!pblendw_ok)
31028 {
31029 rtx rperm[16], vperm;
31030
31031 for (i = 0; i < nelt; ++i)
31032 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
31033
31034 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
31035 vperm = force_reg (V16QImode, vperm);
31036
31037 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
31038 return true;
31039 }
31040 }
31041
31042 target = d->target;
31043 op0 = d->op0;
31044 op1 = d->op1;
31045 mask = 0;
31046
31047 switch (vmode)
31048 {
31049 case V4DFmode:
31050 case V8SFmode:
31051 case V2DFmode:
31052 case V4SFmode:
31053 case V8HImode:
31054 for (i = 0; i < nelt; ++i)
31055 mask |= (d->perm[i] >= nelt) << i;
31056 break;
31057
31058 case V2DImode:
31059 for (i = 0; i < 2; ++i)
31060 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
31061 goto do_subreg;
31062
31063 case V4SImode:
31064 for (i = 0; i < 4; ++i)
31065 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
31066 goto do_subreg;
31067
31068 case V16QImode:
31069 for (i = 0; i < 8; ++i)
31070 mask |= (d->perm[i * 2] >= 16) << i;
31071
31072 do_subreg:
31073 vmode = V8HImode;
31074 target = gen_lowpart (vmode, target);
31075 op0 = gen_lowpart (vmode, op0);
31076 op1 = gen_lowpart (vmode, op1);
31077 break;
31078
31079 default:
31080 gcc_unreachable ();
31081 }
31082
31083 /* This matches five different patterns with the different modes. */
31084 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
31085 x = gen_rtx_SET (VOIDmode, target, x);
31086 emit_insn (x);
31087
31088 return true;
31089 }
31090
31091 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
31092 in terms of the variable form of vpermilps.
31093
31094 Note that we will have already failed the immediate input vpermilps,
31095 which requires that the high and low part shuffle be identical; the
31096 variable form doesn't require that. */
31097
31098 static bool
31099 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
31100 {
31101 rtx rperm[8], vperm;
31102 unsigned i;
31103
31104 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
31105 return false;
31106
31107 /* We can only permute within the 128-bit lane. */
31108 for (i = 0; i < 8; ++i)
31109 {
31110 unsigned e = d->perm[i];
31111 if (i < 4 ? e >= 4 : e < 4)
31112 return false;
31113 }
31114
31115 if (d->testing_p)
31116 return true;
31117
31118 for (i = 0; i < 8; ++i)
31119 {
31120 unsigned e = d->perm[i];
31121
31122 /* Within each 128-bit lane, the elements of op0 are numbered
31123 from 0 and the elements of op1 are numbered from 4. */
31124 if (e >= 8 + 4)
31125 e -= 8;
31126 else if (e >= 4)
31127 e -= 4;
31128
31129 rperm[i] = GEN_INT (e);
31130 }
31131
31132 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
31133 vperm = force_reg (V8SImode, vperm);
31134 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
31135
31136 return true;
31137 }
31138
31139 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
31140 in terms of pshufb or vpperm. */
31141
31142 static bool
31143 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
31144 {
31145 unsigned i, nelt, eltsz;
31146 rtx rperm[16], vperm, target, op0, op1;
31147
31148 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
31149 return false;
31150 if (GET_MODE_SIZE (d->vmode) != 16)
31151 return false;
31152
31153 if (d->testing_p)
31154 return true;
31155
31156 nelt = d->nelt;
31157 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
31158
31159 for (i = 0; i < nelt; ++i)
31160 {
31161 unsigned j, e = d->perm[i];
31162 for (j = 0; j < eltsz; ++j)
31163 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
31164 }
31165
31166 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
31167 vperm = force_reg (V16QImode, vperm);
31168
31169 target = gen_lowpart (V16QImode, d->target);
31170 op0 = gen_lowpart (V16QImode, d->op0);
31171 if (d->op0 == d->op1)
31172 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
31173 else
31174 {
31175 op1 = gen_lowpart (V16QImode, d->op1);
31176 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
31177 }
31178
31179 return true;
31180 }
31181
31182 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
31183 in a single instruction. */
31184
31185 static bool
31186 expand_vec_perm_1 (struct expand_vec_perm_d *d)
31187 {
31188 unsigned i, nelt = d->nelt;
31189 unsigned char perm2[MAX_VECT_LEN];
31190
31191 /* Check plain VEC_SELECT first, because AVX has instructions that could
31192 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
31193 input where SEL+CONCAT may not. */
31194 if (d->op0 == d->op1)
31195 {
31196 int mask = nelt - 1;
31197
31198 for (i = 0; i < nelt; i++)
31199 perm2[i] = d->perm[i] & mask;
31200
31201 if (expand_vselect (d->target, d->op0, perm2, nelt))
31202 return true;
31203
31204 /* There are plenty of patterns in sse.md that are written for
31205 SEL+CONCAT and are not replicated for a single op. Perhaps
31206 that should be changed, to avoid the nastiness here. */
31207
31208 /* Recognize interleave style patterns, which means incrementing
31209 every other permutation operand. */
31210 for (i = 0; i < nelt; i += 2)
31211 {
31212 perm2[i] = d->perm[i] & mask;
31213 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
31214 }
31215 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
31216 return true;
31217
31218 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
31219 if (nelt >= 4)
31220 {
31221 for (i = 0; i < nelt; i += 4)
31222 {
31223 perm2[i + 0] = d->perm[i + 0] & mask;
31224 perm2[i + 1] = d->perm[i + 1] & mask;
31225 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
31226 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
31227 }
31228
31229 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
31230 return true;
31231 }
31232 }
31233
31234 /* Finally, try the fully general two operand permute. */
31235 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
31236 return true;
31237
31238 /* Recognize interleave style patterns with reversed operands. */
31239 if (d->op0 != d->op1)
31240 {
31241 for (i = 0; i < nelt; ++i)
31242 {
31243 unsigned e = d->perm[i];
31244 if (e >= nelt)
31245 e -= nelt;
31246 else
31247 e += nelt;
31248 perm2[i] = e;
31249 }
31250
31251 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
31252 return true;
31253 }
31254
31255 /* Try the SSE4.1 blend variable merge instructions. */
31256 if (expand_vec_perm_blend (d))
31257 return true;
31258
31259 /* Try one of the AVX vpermil variable permutations. */
31260 if (expand_vec_perm_vpermil (d))
31261 return true;
31262
31263 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
31264 if (expand_vec_perm_pshufb (d))
31265 return true;
31266
31267 return false;
31268 }
31269
31270 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
31271 in terms of a pair of pshuflw + pshufhw instructions. */
31272
31273 static bool
31274 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
31275 {
31276 unsigned char perm2[MAX_VECT_LEN];
31277 unsigned i;
31278 bool ok;
31279
31280 if (d->vmode != V8HImode || d->op0 != d->op1)
31281 return false;
31282
31283 /* The two permutations only operate in 64-bit lanes. */
31284 for (i = 0; i < 4; ++i)
31285 if (d->perm[i] >= 4)
31286 return false;
31287 for (i = 4; i < 8; ++i)
31288 if (d->perm[i] < 4)
31289 return false;
31290
31291 if (d->testing_p)
31292 return true;
31293
31294 /* Emit the pshuflw. */
31295 memcpy (perm2, d->perm, 4);
31296 for (i = 4; i < 8; ++i)
31297 perm2[i] = i;
31298 ok = expand_vselect (d->target, d->op0, perm2, 8);
31299 gcc_assert (ok);
31300
31301 /* Emit the pshufhw. */
31302 memcpy (perm2 + 4, d->perm + 4, 4);
31303 for (i = 0; i < 4; ++i)
31304 perm2[i] = i;
31305 ok = expand_vselect (d->target, d->target, perm2, 8);
31306 gcc_assert (ok);
31307
31308 return true;
31309 }
31310
31311 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
31312 the permutation using the SSSE3 palignr instruction. This succeeds
31313 when all of the elements in PERM fit within one vector and we merely
31314 need to shift them down so that a single vector permutation has a
31315 chance to succeed. */
31316
31317 static bool
31318 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
31319 {
31320 unsigned i, nelt = d->nelt;
31321 unsigned min, max;
31322 bool in_order, ok;
31323 rtx shift;
31324
31325 /* Even with AVX, palignr only operates on 128-bit vectors. */
31326 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
31327 return false;
31328
31329 min = nelt, max = 0;
31330 for (i = 0; i < nelt; ++i)
31331 {
31332 unsigned e = d->perm[i];
31333 if (e < min)
31334 min = e;
31335 if (e > max)
31336 max = e;
31337 }
31338 if (min == 0 || max - min >= nelt)
31339 return false;
31340
31341 /* Given that we have SSSE3, we know we'll be able to implement the
31342 single operand permutation after the palignr with pshufb. */
31343 if (d->testing_p)
31344 return true;
31345
31346 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
31347 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
31348 gen_lowpart (TImode, d->op1),
31349 gen_lowpart (TImode, d->op0), shift));
31350
31351 d->op0 = d->op1 = d->target;
31352
31353 in_order = true;
31354 for (i = 0; i < nelt; ++i)
31355 {
31356 unsigned e = d->perm[i] - min;
31357 if (e != i)
31358 in_order = false;
31359 d->perm[i] = e;
31360 }
31361
31362 /* Test for the degenerate case where the alignment by itself
31363 produces the desired permutation. */
31364 if (in_order)
31365 return true;
31366
31367 ok = expand_vec_perm_1 (d);
31368 gcc_assert (ok);
31369
31370 return ok;
31371 }
31372
31373 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
31374 a two vector permutation into a single vector permutation by using
31375 an interleave operation to merge the vectors. */
31376
31377 static bool
31378 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
31379 {
31380 struct expand_vec_perm_d dremap, dfinal;
31381 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
31382 unsigned contents, h1, h2, h3, h4;
31383 unsigned char remap[2 * MAX_VECT_LEN];
31384 rtx seq;
31385 bool ok;
31386
31387 if (d->op0 == d->op1)
31388 return false;
31389
31390 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
31391 lanes. We can use similar techniques with the vperm2f128 instruction,
31392 but it requires slightly different logic. */
31393 if (GET_MODE_SIZE (d->vmode) != 16)
31394 return false;
31395
31396 /* Examine from whence the elements come. */
31397 contents = 0;
31398 for (i = 0; i < nelt; ++i)
31399 contents |= 1u << d->perm[i];
31400
31401 /* Split the two input vectors into 4 halves. */
31402 h1 = (1u << nelt2) - 1;
31403 h2 = h1 << nelt2;
31404 h3 = h2 << nelt2;
31405 h4 = h3 << nelt2;
31406
31407 memset (remap, 0xff, sizeof (remap));
31408 dremap = *d;
31409
31410 /* If the elements from the low halves use interleave low, and similarly
31411 for interleave high. If the elements are from mis-matched halves, we
31412 can use shufps for V4SF/V4SI or do a DImode shuffle. */
31413 if ((contents & (h1 | h3)) == contents)
31414 {
31415 for (i = 0; i < nelt2; ++i)
31416 {
31417 remap[i] = i * 2;
31418 remap[i + nelt] = i * 2 + 1;
31419 dremap.perm[i * 2] = i;
31420 dremap.perm[i * 2 + 1] = i + nelt;
31421 }
31422 }
31423 else if ((contents & (h2 | h4)) == contents)
31424 {
31425 for (i = 0; i < nelt2; ++i)
31426 {
31427 remap[i + nelt2] = i * 2;
31428 remap[i + nelt + nelt2] = i * 2 + 1;
31429 dremap.perm[i * 2] = i + nelt2;
31430 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
31431 }
31432 }
31433 else if ((contents & (h1 | h4)) == contents)
31434 {
31435 for (i = 0; i < nelt2; ++i)
31436 {
31437 remap[i] = i;
31438 remap[i + nelt + nelt2] = i + nelt2;
31439 dremap.perm[i] = i;
31440 dremap.perm[i + nelt2] = i + nelt + nelt2;
31441 }
31442 if (nelt != 4)
31443 {
31444 dremap.vmode = V2DImode;
31445 dremap.nelt = 2;
31446 dremap.perm[0] = 0;
31447 dremap.perm[1] = 3;
31448 }
31449 }
31450 else if ((contents & (h2 | h3)) == contents)
31451 {
31452 for (i = 0; i < nelt2; ++i)
31453 {
31454 remap[i + nelt2] = i;
31455 remap[i + nelt] = i + nelt2;
31456 dremap.perm[i] = i + nelt2;
31457 dremap.perm[i + nelt2] = i + nelt;
31458 }
31459 if (nelt != 4)
31460 {
31461 dremap.vmode = V2DImode;
31462 dremap.nelt = 2;
31463 dremap.perm[0] = 1;
31464 dremap.perm[1] = 2;
31465 }
31466 }
31467 else
31468 return false;
31469
31470 /* Use the remapping array set up above to move the elements from their
31471 swizzled locations into their final destinations. */
31472 dfinal = *d;
31473 for (i = 0; i < nelt; ++i)
31474 {
31475 unsigned e = remap[d->perm[i]];
31476 gcc_assert (e < nelt);
31477 dfinal.perm[i] = e;
31478 }
31479 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
31480 dfinal.op1 = dfinal.op0;
31481 dremap.target = dfinal.op0;
31482
31483 /* Test if the final remap can be done with a single insn. For V4SFmode or
31484 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
31485 start_sequence ();
31486 ok = expand_vec_perm_1 (&dfinal);
31487 seq = get_insns ();
31488 end_sequence ();
31489
31490 if (!ok)
31491 return false;
31492
31493 if (dremap.vmode != dfinal.vmode)
31494 {
31495 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
31496 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
31497 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
31498 }
31499
31500 ok = expand_vec_perm_1 (&dremap);
31501 gcc_assert (ok);
31502
31503 emit_insn (seq);
31504 return true;
31505 }
31506
31507 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
31508 permutation with two pshufb insns and an ior. We should have already
31509 failed all two instruction sequences. */
31510
31511 static bool
31512 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
31513 {
31514 rtx rperm[2][16], vperm, l, h, op, m128;
31515 unsigned int i, nelt, eltsz;
31516
31517 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
31518 return false;
31519 gcc_assert (d->op0 != d->op1);
31520
31521 nelt = d->nelt;
31522 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
31523
31524 /* Generate two permutation masks. If the required element is within
31525 the given vector it is shuffled into the proper lane. If the required
31526 element is in the other vector, force a zero into the lane by setting
31527 bit 7 in the permutation mask. */
31528 m128 = GEN_INT (-128);
31529 for (i = 0; i < nelt; ++i)
31530 {
31531 unsigned j, e = d->perm[i];
31532 unsigned which = (e >= nelt);
31533 if (e >= nelt)
31534 e -= nelt;
31535
31536 for (j = 0; j < eltsz; ++j)
31537 {
31538 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
31539 rperm[1-which][i*eltsz + j] = m128;
31540 }
31541 }
31542
31543 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
31544 vperm = force_reg (V16QImode, vperm);
31545
31546 l = gen_reg_rtx (V16QImode);
31547 op = gen_lowpart (V16QImode, d->op0);
31548 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
31549
31550 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
31551 vperm = force_reg (V16QImode, vperm);
31552
31553 h = gen_reg_rtx (V16QImode);
31554 op = gen_lowpart (V16QImode, d->op1);
31555 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
31556
31557 op = gen_lowpart (V16QImode, d->target);
31558 emit_insn (gen_iorv16qi3 (op, l, h));
31559
31560 return true;
31561 }
31562
31563 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
31564 and extract-odd permutations. */
31565
31566 static bool
31567 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
31568 {
31569 rtx t1, t2, t3, t4;
31570
31571 switch (d->vmode)
31572 {
31573 case V4DFmode:
31574 t1 = gen_reg_rtx (V4DFmode);
31575 t2 = gen_reg_rtx (V4DFmode);
31576
31577 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
31578 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
31579 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
31580
31581 /* Now an unpck[lh]pd will produce the result required. */
31582 if (odd)
31583 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
31584 else
31585 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
31586 emit_insn (t3);
31587 break;
31588
31589 case V8SFmode:
31590 {
31591 static const unsigned char perm1[8] = { 0, 2, 1, 3, 5, 6, 5, 7 };
31592 static const unsigned char perme[8] = { 0, 1, 8, 9, 4, 5, 12, 13 };
31593 static const unsigned char permo[8] = { 2, 3, 10, 11, 6, 7, 14, 15 };
31594
31595 t1 = gen_reg_rtx (V8SFmode);
31596 t2 = gen_reg_rtx (V8SFmode);
31597 t3 = gen_reg_rtx (V8SFmode);
31598 t4 = gen_reg_rtx (V8SFmode);
31599
31600 /* Shuffle within the 128-bit lanes to produce:
31601 { 0 2 1 3 4 6 5 7 } and { 8 a 9 b c e d f }. */
31602 expand_vselect (t1, d->op0, perm1, 8);
31603 expand_vselect (t2, d->op1, perm1, 8);
31604
31605 /* Shuffle the lanes around to produce:
31606 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
31607 emit_insn (gen_avx_vperm2f128v8sf3 (t3, t1, t2, GEN_INT (0x20)));
31608 emit_insn (gen_avx_vperm2f128v8sf3 (t4, t1, t2, GEN_INT (0x31)));
31609
31610 /* Now a vpermil2p will produce the result required. */
31611 /* ??? The vpermil2p requires a vector constant. Another option
31612 is a unpck[lh]ps to merge the two vectors to produce
31613 { 0 4 2 6 8 c a e } or { 1 5 3 7 9 d b f }. Then use another
31614 vpermilps to get the elements into the final order. */
31615 d->op0 = t3;
31616 d->op1 = t4;
31617 memcpy (d->perm, odd ? permo: perme, 8);
31618 expand_vec_perm_vpermil (d);
31619 }
31620 break;
31621
31622 case V2DFmode:
31623 case V4SFmode:
31624 case V2DImode:
31625 case V4SImode:
31626 /* These are always directly implementable by expand_vec_perm_1. */
31627 gcc_unreachable ();
31628
31629 case V8HImode:
31630 if (TARGET_SSSE3)
31631 return expand_vec_perm_pshufb2 (d);
31632 else
31633 {
31634 /* We need 2*log2(N)-1 operations to achieve odd/even
31635 with interleave. */
31636 t1 = gen_reg_rtx (V8HImode);
31637 t2 = gen_reg_rtx (V8HImode);
31638 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
31639 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
31640 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
31641 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
31642 if (odd)
31643 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
31644 else
31645 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
31646 emit_insn (t3);
31647 }
31648 break;
31649
31650 case V16QImode:
31651 if (TARGET_SSSE3)
31652 return expand_vec_perm_pshufb2 (d);
31653 else
31654 {
31655 t1 = gen_reg_rtx (V16QImode);
31656 t2 = gen_reg_rtx (V16QImode);
31657 t3 = gen_reg_rtx (V16QImode);
31658 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
31659 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
31660 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
31661 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
31662 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
31663 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
31664 if (odd)
31665 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
31666 else
31667 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
31668 emit_insn (t3);
31669 }
31670 break;
31671
31672 default:
31673 gcc_unreachable ();
31674 }
31675
31676 return true;
31677 }
31678
31679 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
31680 extract-even and extract-odd permutations. */
31681
31682 static bool
31683 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
31684 {
31685 unsigned i, odd, nelt = d->nelt;
31686
31687 odd = d->perm[0];
31688 if (odd != 0 && odd != 1)
31689 return false;
31690
31691 for (i = 1; i < nelt; ++i)
31692 if (d->perm[i] != 2 * i + odd)
31693 return false;
31694
31695 return expand_vec_perm_even_odd_1 (d, odd);
31696 }
31697
31698 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
31699 permutations. We assume that expand_vec_perm_1 has already failed. */
31700
31701 static bool
31702 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
31703 {
31704 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
31705 enum machine_mode vmode = d->vmode;
31706 unsigned char perm2[4];
31707 rtx op0 = d->op0;
31708 bool ok;
31709
31710 switch (vmode)
31711 {
31712 case V4DFmode:
31713 case V8SFmode:
31714 /* These are special-cased in sse.md so that we can optionally
31715 use the vbroadcast instruction. They expand to two insns
31716 if the input happens to be in a register. */
31717 gcc_unreachable ();
31718
31719 case V2DFmode:
31720 case V2DImode:
31721 case V4SFmode:
31722 case V4SImode:
31723 /* These are always implementable using standard shuffle patterns. */
31724 gcc_unreachable ();
31725
31726 case V8HImode:
31727 case V16QImode:
31728 /* These can be implemented via interleave. We save one insn by
31729 stopping once we have promoted to V4SImode and then use pshufd. */
31730 do
31731 {
31732 optab otab = vec_interleave_low_optab;
31733
31734 if (elt >= nelt2)
31735 {
31736 otab = vec_interleave_high_optab;
31737 elt -= nelt2;
31738 }
31739 nelt2 /= 2;
31740
31741 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
31742 vmode = get_mode_wider_vector (vmode);
31743 op0 = gen_lowpart (vmode, op0);
31744 }
31745 while (vmode != V4SImode);
31746
31747 memset (perm2, elt, 4);
31748 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
31749 gcc_assert (ok);
31750 return true;
31751
31752 default:
31753 gcc_unreachable ();
31754 }
31755 }
31756
31757 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
31758 broadcast permutations. */
31759
31760 static bool
31761 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
31762 {
31763 unsigned i, elt, nelt = d->nelt;
31764
31765 if (d->op0 != d->op1)
31766 return false;
31767
31768 elt = d->perm[0];
31769 for (i = 1; i < nelt; ++i)
31770 if (d->perm[i] != elt)
31771 return false;
31772
31773 return expand_vec_perm_broadcast_1 (d);
31774 }
31775
31776 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
31777 With all of the interface bits taken care of, perform the expansion
31778 in D and return true on success. */
31779
31780 static bool
31781 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
31782 {
31783 /* Try a single instruction expansion. */
31784 if (expand_vec_perm_1 (d))
31785 return true;
31786
31787 /* Try sequences of two instructions. */
31788
31789 if (expand_vec_perm_pshuflw_pshufhw (d))
31790 return true;
31791
31792 if (expand_vec_perm_palignr (d))
31793 return true;
31794
31795 if (expand_vec_perm_interleave2 (d))
31796 return true;
31797
31798 if (expand_vec_perm_broadcast (d))
31799 return true;
31800
31801 /* Try sequences of three instructions. */
31802
31803 if (expand_vec_perm_pshufb2 (d))
31804 return true;
31805
31806 /* ??? Look for narrow permutations whose element orderings would
31807 allow the promotion to a wider mode. */
31808
31809 /* ??? Look for sequences of interleave or a wider permute that place
31810 the data into the correct lanes for a half-vector shuffle like
31811 pshuf[lh]w or vpermilps. */
31812
31813 /* ??? Look for sequences of interleave that produce the desired results.
31814 The combinatorics of punpck[lh] get pretty ugly... */
31815
31816 if (expand_vec_perm_even_odd (d))
31817 return true;
31818
31819 return false;
31820 }
31821
31822 /* Extract the values from the vector CST into the permutation array in D.
31823 Return 0 on error, 1 if all values from the permutation come from the
31824 first vector, 2 if all values from the second vector, and 3 otherwise. */
31825
31826 static int
31827 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
31828 {
31829 tree list = TREE_VECTOR_CST_ELTS (cst);
31830 unsigned i, nelt = d->nelt;
31831 int ret = 0;
31832
31833 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
31834 {
31835 unsigned HOST_WIDE_INT e;
31836
31837 if (!host_integerp (TREE_VALUE (list), 1))
31838 return 0;
31839 e = tree_low_cst (TREE_VALUE (list), 1);
31840 if (e >= 2 * nelt)
31841 return 0;
31842
31843 ret |= (e < nelt ? 1 : 2);
31844 d->perm[i] = e;
31845 }
31846 gcc_assert (list == NULL);
31847
31848 /* For all elements from second vector, fold the elements to first. */
31849 if (ret == 2)
31850 for (i = 0; i < nelt; ++i)
31851 d->perm[i] -= nelt;
31852
31853 return ret;
31854 }
31855
31856 static rtx
31857 ix86_expand_vec_perm_builtin (tree exp)
31858 {
31859 struct expand_vec_perm_d d;
31860 tree arg0, arg1, arg2;
31861
31862 arg0 = CALL_EXPR_ARG (exp, 0);
31863 arg1 = CALL_EXPR_ARG (exp, 1);
31864 arg2 = CALL_EXPR_ARG (exp, 2);
31865
31866 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
31867 d.nelt = GET_MODE_NUNITS (d.vmode);
31868 d.testing_p = false;
31869 gcc_assert (VECTOR_MODE_P (d.vmode));
31870
31871 if (TREE_CODE (arg2) != VECTOR_CST)
31872 {
31873 error_at (EXPR_LOCATION (exp),
31874 "vector permutation requires vector constant");
31875 goto exit_error;
31876 }
31877
31878 switch (extract_vec_perm_cst (&d, arg2))
31879 {
31880 default:
31881 gcc_unreachable();
31882
31883 case 0:
31884 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
31885 goto exit_error;
31886
31887 case 3:
31888 if (!operand_equal_p (arg0, arg1, 0))
31889 {
31890 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
31891 d.op0 = force_reg (d.vmode, d.op0);
31892 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
31893 d.op1 = force_reg (d.vmode, d.op1);
31894 break;
31895 }
31896
31897 /* The elements of PERM do not suggest that only the first operand
31898 is used, but both operands are identical. Allow easier matching
31899 of the permutation by folding the permutation into the single
31900 input vector. */
31901 {
31902 unsigned i, nelt = d.nelt;
31903 for (i = 0; i < nelt; ++i)
31904 if (d.perm[i] >= nelt)
31905 d.perm[i] -= nelt;
31906 }
31907 /* FALLTHRU */
31908
31909 case 1:
31910 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
31911 d.op0 = force_reg (d.vmode, d.op0);
31912 d.op1 = d.op0;
31913 break;
31914
31915 case 2:
31916 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
31917 d.op0 = force_reg (d.vmode, d.op0);
31918 d.op1 = d.op0;
31919 break;
31920 }
31921
31922 d.target = gen_reg_rtx (d.vmode);
31923 if (ix86_expand_vec_perm_builtin_1 (&d))
31924 return d.target;
31925
31926 /* For compiler generated permutations, we should never got here, because
31927 the compiler should also be checking the ok hook. But since this is a
31928 builtin the user has access too, so don't abort. */
31929 switch (d.nelt)
31930 {
31931 case 2:
31932 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
31933 break;
31934 case 4:
31935 sorry ("vector permutation (%d %d %d %d)",
31936 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
31937 break;
31938 case 8:
31939 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
31940 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
31941 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
31942 break;
31943 case 16:
31944 sorry ("vector permutation "
31945 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
31946 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
31947 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
31948 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
31949 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
31950 break;
31951 default:
31952 gcc_unreachable ();
31953 }
31954 exit_error:
31955 return CONST0_RTX (d.vmode);
31956 }
31957
31958 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
31959
31960 static bool
31961 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
31962 {
31963 struct expand_vec_perm_d d;
31964 int vec_mask;
31965 bool ret, one_vec;
31966
31967 d.vmode = TYPE_MODE (vec_type);
31968 d.nelt = GET_MODE_NUNITS (d.vmode);
31969 d.testing_p = true;
31970
31971 /* Given sufficient ISA support we can just return true here
31972 for selected vector modes. */
31973 if (GET_MODE_SIZE (d.vmode) == 16)
31974 {
31975 /* All implementable with a single vpperm insn. */
31976 if (TARGET_XOP)
31977 return true;
31978 /* All implementable with 2 pshufb + 1 ior. */
31979 if (TARGET_SSSE3)
31980 return true;
31981 /* All implementable with shufpd or unpck[lh]pd. */
31982 if (d.nelt == 2)
31983 return true;
31984 }
31985
31986 vec_mask = extract_vec_perm_cst (&d, mask);
31987
31988 /* This hook is cannot be called in response to something that the
31989 user does (unlike the builtin expander) so we shouldn't ever see
31990 an error generated from the extract. */
31991 gcc_assert (vec_mask > 0 && vec_mask <= 3);
31992 one_vec = (vec_mask != 3);
31993
31994 /* Implementable with shufps or pshufd. */
31995 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
31996 return true;
31997
31998 /* Otherwise we have to go through the motions and see if we can
31999 figure out how to generate the requested permutation. */
32000 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
32001 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
32002 if (!one_vec)
32003 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
32004
32005 start_sequence ();
32006 ret = ix86_expand_vec_perm_builtin_1 (&d);
32007 end_sequence ();
32008
32009 return ret;
32010 }
32011
32012 void
32013 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
32014 {
32015 struct expand_vec_perm_d d;
32016 unsigned i, nelt;
32017
32018 d.target = targ;
32019 d.op0 = op0;
32020 d.op1 = op1;
32021 d.vmode = GET_MODE (targ);
32022 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
32023 d.testing_p = false;
32024
32025 for (i = 0; i < nelt; ++i)
32026 d.perm[i] = i * 2 + odd;
32027
32028 /* We'll either be able to implement the permutation directly... */
32029 if (expand_vec_perm_1 (&d))
32030 return;
32031
32032 /* ... or we use the special-case patterns. */
32033 expand_vec_perm_even_odd_1 (&d, odd);
32034 }
32035 \f
32036 /* This function returns the calling abi specific va_list type node.
32037 It returns the FNDECL specific va_list type. */
32038
32039 static tree
32040 ix86_fn_abi_va_list (tree fndecl)
32041 {
32042 if (!TARGET_64BIT)
32043 return va_list_type_node;
32044 gcc_assert (fndecl != NULL_TREE);
32045
32046 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
32047 return ms_va_list_type_node;
32048 else
32049 return sysv_va_list_type_node;
32050 }
32051
32052 /* Returns the canonical va_list type specified by TYPE. If there
32053 is no valid TYPE provided, it return NULL_TREE. */
32054
32055 static tree
32056 ix86_canonical_va_list_type (tree type)
32057 {
32058 tree wtype, htype;
32059
32060 /* Resolve references and pointers to va_list type. */
32061 if (TREE_CODE (type) == MEM_REF)
32062 type = TREE_TYPE (type);
32063 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
32064 type = TREE_TYPE (type);
32065 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
32066 type = TREE_TYPE (type);
32067
32068 if (TARGET_64BIT)
32069 {
32070 wtype = va_list_type_node;
32071 gcc_assert (wtype != NULL_TREE);
32072 htype = type;
32073 if (TREE_CODE (wtype) == ARRAY_TYPE)
32074 {
32075 /* If va_list is an array type, the argument may have decayed
32076 to a pointer type, e.g. by being passed to another function.
32077 In that case, unwrap both types so that we can compare the
32078 underlying records. */
32079 if (TREE_CODE (htype) == ARRAY_TYPE
32080 || POINTER_TYPE_P (htype))
32081 {
32082 wtype = TREE_TYPE (wtype);
32083 htype = TREE_TYPE (htype);
32084 }
32085 }
32086 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
32087 return va_list_type_node;
32088 wtype = sysv_va_list_type_node;
32089 gcc_assert (wtype != NULL_TREE);
32090 htype = type;
32091 if (TREE_CODE (wtype) == ARRAY_TYPE)
32092 {
32093 /* If va_list is an array type, the argument may have decayed
32094 to a pointer type, e.g. by being passed to another function.
32095 In that case, unwrap both types so that we can compare the
32096 underlying records. */
32097 if (TREE_CODE (htype) == ARRAY_TYPE
32098 || POINTER_TYPE_P (htype))
32099 {
32100 wtype = TREE_TYPE (wtype);
32101 htype = TREE_TYPE (htype);
32102 }
32103 }
32104 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
32105 return sysv_va_list_type_node;
32106 wtype = ms_va_list_type_node;
32107 gcc_assert (wtype != NULL_TREE);
32108 htype = type;
32109 if (TREE_CODE (wtype) == ARRAY_TYPE)
32110 {
32111 /* If va_list is an array type, the argument may have decayed
32112 to a pointer type, e.g. by being passed to another function.
32113 In that case, unwrap both types so that we can compare the
32114 underlying records. */
32115 if (TREE_CODE (htype) == ARRAY_TYPE
32116 || POINTER_TYPE_P (htype))
32117 {
32118 wtype = TREE_TYPE (wtype);
32119 htype = TREE_TYPE (htype);
32120 }
32121 }
32122 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
32123 return ms_va_list_type_node;
32124 return NULL_TREE;
32125 }
32126 return std_canonical_va_list_type (type);
32127 }
32128
32129 /* Iterate through the target-specific builtin types for va_list.
32130 IDX denotes the iterator, *PTREE is set to the result type of
32131 the va_list builtin, and *PNAME to its internal type.
32132 Returns zero if there is no element for this index, otherwise
32133 IDX should be increased upon the next call.
32134 Note, do not iterate a base builtin's name like __builtin_va_list.
32135 Used from c_common_nodes_and_builtins. */
32136
32137 static int
32138 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
32139 {
32140 if (TARGET_64BIT)
32141 {
32142 switch (idx)
32143 {
32144 default:
32145 break;
32146
32147 case 0:
32148 *ptree = ms_va_list_type_node;
32149 *pname = "__builtin_ms_va_list";
32150 return 1;
32151
32152 case 1:
32153 *ptree = sysv_va_list_type_node;
32154 *pname = "__builtin_sysv_va_list";
32155 return 1;
32156 }
32157 }
32158
32159 return 0;
32160 }
32161
32162 #undef TARGET_SCHED_DISPATCH
32163 #define TARGET_SCHED_DISPATCH has_dispatch
32164 #undef TARGET_SCHED_DISPATCH_DO
32165 #define TARGET_SCHED_DISPATCH_DO do_dispatch
32166
32167 /* The size of the dispatch window is the total number of bytes of
32168 object code allowed in a window. */
32169 #define DISPATCH_WINDOW_SIZE 16
32170
32171 /* Number of dispatch windows considered for scheduling. */
32172 #define MAX_DISPATCH_WINDOWS 3
32173
32174 /* Maximum number of instructions in a window. */
32175 #define MAX_INSN 4
32176
32177 /* Maximum number of immediate operands in a window. */
32178 #define MAX_IMM 4
32179
32180 /* Maximum number of immediate bits allowed in a window. */
32181 #define MAX_IMM_SIZE 128
32182
32183 /* Maximum number of 32 bit immediates allowed in a window. */
32184 #define MAX_IMM_32 4
32185
32186 /* Maximum number of 64 bit immediates allowed in a window. */
32187 #define MAX_IMM_64 2
32188
32189 /* Maximum total of loads or prefetches allowed in a window. */
32190 #define MAX_LOAD 2
32191
32192 /* Maximum total of stores allowed in a window. */
32193 #define MAX_STORE 1
32194
32195 #undef BIG
32196 #define BIG 100
32197
32198
32199 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
32200 enum dispatch_group {
32201 disp_no_group = 0,
32202 disp_load,
32203 disp_store,
32204 disp_load_store,
32205 disp_prefetch,
32206 disp_imm,
32207 disp_imm_32,
32208 disp_imm_64,
32209 disp_branch,
32210 disp_cmp,
32211 disp_jcc,
32212 disp_last
32213 };
32214
32215 /* Number of allowable groups in a dispatch window. It is an array
32216 indexed by dispatch_group enum. 100 is used as a big number,
32217 because the number of these kind of operations does not have any
32218 effect in dispatch window, but we need them for other reasons in
32219 the table. */
32220 static unsigned int num_allowable_groups[disp_last] = {
32221 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
32222 };
32223
32224 char group_name[disp_last + 1][16] = {
32225 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
32226 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
32227 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
32228 };
32229
32230 /* Instruction path. */
32231 enum insn_path {
32232 no_path = 0,
32233 path_single, /* Single micro op. */
32234 path_double, /* Double micro op. */
32235 path_multi, /* Instructions with more than 2 micro op.. */
32236 last_path
32237 };
32238
32239 /* sched_insn_info defines a window to the instructions scheduled in
32240 the basic block. It contains a pointer to the insn_info table and
32241 the instruction scheduled.
32242
32243 Windows are allocated for each basic block and are linked
32244 together. */
32245 typedef struct sched_insn_info_s {
32246 rtx insn;
32247 enum dispatch_group group;
32248 enum insn_path path;
32249 int byte_len;
32250 int imm_bytes;
32251 } sched_insn_info;
32252
32253 /* Linked list of dispatch windows. This is a two way list of
32254 dispatch windows of a basic block. It contains information about
32255 the number of uops in the window and the total number of
32256 instructions and of bytes in the object code for this dispatch
32257 window. */
32258 typedef struct dispatch_windows_s {
32259 int num_insn; /* Number of insn in the window. */
32260 int num_uops; /* Number of uops in the window. */
32261 int window_size; /* Number of bytes in the window. */
32262 int window_num; /* Window number between 0 or 1. */
32263 int num_imm; /* Number of immediates in an insn. */
32264 int num_imm_32; /* Number of 32 bit immediates in an insn. */
32265 int num_imm_64; /* Number of 64 bit immediates in an insn. */
32266 int imm_size; /* Total immediates in the window. */
32267 int num_loads; /* Total memory loads in the window. */
32268 int num_stores; /* Total memory stores in the window. */
32269 int violation; /* Violation exists in window. */
32270 sched_insn_info *window; /* Pointer to the window. */
32271 struct dispatch_windows_s *next;
32272 struct dispatch_windows_s *prev;
32273 } dispatch_windows;
32274
32275 /* Immediate valuse used in an insn. */
32276 typedef struct imm_info_s
32277 {
32278 int imm;
32279 int imm32;
32280 int imm64;
32281 } imm_info;
32282
32283 static dispatch_windows *dispatch_window_list;
32284 static dispatch_windows *dispatch_window_list1;
32285
32286 /* Get dispatch group of insn. */
32287
32288 static enum dispatch_group
32289 get_mem_group (rtx insn)
32290 {
32291 enum attr_memory memory;
32292
32293 if (INSN_CODE (insn) < 0)
32294 return disp_no_group;
32295 memory = get_attr_memory (insn);
32296 if (memory == MEMORY_STORE)
32297 return disp_store;
32298
32299 if (memory == MEMORY_LOAD)
32300 return disp_load;
32301
32302 if (memory == MEMORY_BOTH)
32303 return disp_load_store;
32304
32305 return disp_no_group;
32306 }
32307
32308 /* Return true if insn is a compare instruction. */
32309
32310 static bool
32311 is_cmp (rtx insn)
32312 {
32313 enum attr_type type;
32314
32315 type = get_attr_type (insn);
32316 return (type == TYPE_TEST
32317 || type == TYPE_ICMP
32318 || type == TYPE_FCMP
32319 || GET_CODE (PATTERN (insn)) == COMPARE);
32320 }
32321
32322 /* Return true if a dispatch violation encountered. */
32323
32324 static bool
32325 dispatch_violation (void)
32326 {
32327 if (dispatch_window_list->next)
32328 return dispatch_window_list->next->violation;
32329 return dispatch_window_list->violation;
32330 }
32331
32332 /* Return true if insn is a branch instruction. */
32333
32334 static bool
32335 is_branch (rtx insn)
32336 {
32337 return (CALL_P (insn) || JUMP_P (insn));
32338 }
32339
32340 /* Return true if insn is a prefetch instruction. */
32341
32342 static bool
32343 is_prefetch (rtx insn)
32344 {
32345 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
32346 }
32347
32348 /* This function initializes a dispatch window and the list container holding a
32349 pointer to the window. */
32350
32351 static void
32352 init_window (int window_num)
32353 {
32354 int i;
32355 dispatch_windows *new_list;
32356
32357 if (window_num == 0)
32358 new_list = dispatch_window_list;
32359 else
32360 new_list = dispatch_window_list1;
32361
32362 new_list->num_insn = 0;
32363 new_list->num_uops = 0;
32364 new_list->window_size = 0;
32365 new_list->next = NULL;
32366 new_list->prev = NULL;
32367 new_list->window_num = window_num;
32368 new_list->num_imm = 0;
32369 new_list->num_imm_32 = 0;
32370 new_list->num_imm_64 = 0;
32371 new_list->imm_size = 0;
32372 new_list->num_loads = 0;
32373 new_list->num_stores = 0;
32374 new_list->violation = false;
32375
32376 for (i = 0; i < MAX_INSN; i++)
32377 {
32378 new_list->window[i].insn = NULL;
32379 new_list->window[i].group = disp_no_group;
32380 new_list->window[i].path = no_path;
32381 new_list->window[i].byte_len = 0;
32382 new_list->window[i].imm_bytes = 0;
32383 }
32384 return;
32385 }
32386
32387 /* This function allocates and initializes a dispatch window and the
32388 list container holding a pointer to the window. */
32389
32390 static dispatch_windows *
32391 allocate_window (void)
32392 {
32393 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
32394 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
32395
32396 return new_list;
32397 }
32398
32399 /* This routine initializes the dispatch scheduling information. It
32400 initiates building dispatch scheduler tables and constructs the
32401 first dispatch window. */
32402
32403 static void
32404 init_dispatch_sched (void)
32405 {
32406 /* Allocate a dispatch list and a window. */
32407 dispatch_window_list = allocate_window ();
32408 dispatch_window_list1 = allocate_window ();
32409 init_window (0);
32410 init_window (1);
32411 }
32412
32413 /* This function returns true if a branch is detected. End of a basic block
32414 does not have to be a branch, but here we assume only branches end a
32415 window. */
32416
32417 static bool
32418 is_end_basic_block (enum dispatch_group group)
32419 {
32420 return group == disp_branch;
32421 }
32422
32423 /* This function is called when the end of a window processing is reached. */
32424
32425 static void
32426 process_end_window (void)
32427 {
32428 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
32429 if (dispatch_window_list->next)
32430 {
32431 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
32432 gcc_assert (dispatch_window_list->window_size
32433 + dispatch_window_list1->window_size <= 48);
32434 init_window (1);
32435 }
32436 init_window (0);
32437 }
32438
32439 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
32440 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
32441 for 48 bytes of instructions. Note that these windows are not dispatch
32442 windows that their sizes are DISPATCH_WINDOW_SIZE. */
32443
32444 static dispatch_windows *
32445 allocate_next_window (int window_num)
32446 {
32447 if (window_num == 0)
32448 {
32449 if (dispatch_window_list->next)
32450 init_window (1);
32451 init_window (0);
32452 return dispatch_window_list;
32453 }
32454
32455 dispatch_window_list->next = dispatch_window_list1;
32456 dispatch_window_list1->prev = dispatch_window_list;
32457
32458 return dispatch_window_list1;
32459 }
32460
32461 /* Increment the number of immediate operands of an instruction. */
32462
32463 static int
32464 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
32465 {
32466 if (*in_rtx == 0)
32467 return 0;
32468
32469 switch ( GET_CODE (*in_rtx))
32470 {
32471 case CONST:
32472 case SYMBOL_REF:
32473 case CONST_INT:
32474 (imm_values->imm)++;
32475 if (x86_64_immediate_operand (*in_rtx, SImode))
32476 (imm_values->imm32)++;
32477 else
32478 (imm_values->imm64)++;
32479 break;
32480
32481 case CONST_DOUBLE:
32482 (imm_values->imm)++;
32483 (imm_values->imm64)++;
32484 break;
32485
32486 case CODE_LABEL:
32487 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
32488 {
32489 (imm_values->imm)++;
32490 (imm_values->imm32)++;
32491 }
32492 break;
32493
32494 default:
32495 break;
32496 }
32497
32498 return 0;
32499 }
32500
32501 /* Compute number of immediate operands of an instruction. */
32502
32503 static void
32504 find_constant (rtx in_rtx, imm_info *imm_values)
32505 {
32506 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
32507 (rtx_function) find_constant_1, (void *) imm_values);
32508 }
32509
32510 /* Return total size of immediate operands of an instruction along with number
32511 of corresponding immediate-operands. It initializes its parameters to zero
32512 befor calling FIND_CONSTANT.
32513 INSN is the input instruction. IMM is the total of immediates.
32514 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
32515 bit immediates. */
32516
32517 static int
32518 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
32519 {
32520 imm_info imm_values = {0, 0, 0};
32521
32522 find_constant (insn, &imm_values);
32523 *imm = imm_values.imm;
32524 *imm32 = imm_values.imm32;
32525 *imm64 = imm_values.imm64;
32526 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
32527 }
32528
32529 /* This function indicates if an operand of an instruction is an
32530 immediate. */
32531
32532 static bool
32533 has_immediate (rtx insn)
32534 {
32535 int num_imm_operand;
32536 int num_imm32_operand;
32537 int num_imm64_operand;
32538
32539 if (insn)
32540 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32541 &num_imm64_operand);
32542 return false;
32543 }
32544
32545 /* Return single or double path for instructions. */
32546
32547 static enum insn_path
32548 get_insn_path (rtx insn)
32549 {
32550 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
32551
32552 if ((int)path == 0)
32553 return path_single;
32554
32555 if ((int)path == 1)
32556 return path_double;
32557
32558 return path_multi;
32559 }
32560
32561 /* Return insn dispatch group. */
32562
32563 static enum dispatch_group
32564 get_insn_group (rtx insn)
32565 {
32566 enum dispatch_group group = get_mem_group (insn);
32567 if (group)
32568 return group;
32569
32570 if (is_branch (insn))
32571 return disp_branch;
32572
32573 if (is_cmp (insn))
32574 return disp_cmp;
32575
32576 if (has_immediate (insn))
32577 return disp_imm;
32578
32579 if (is_prefetch (insn))
32580 return disp_prefetch;
32581
32582 return disp_no_group;
32583 }
32584
32585 /* Count number of GROUP restricted instructions in a dispatch
32586 window WINDOW_LIST. */
32587
32588 static int
32589 count_num_restricted (rtx insn, dispatch_windows *window_list)
32590 {
32591 enum dispatch_group group = get_insn_group (insn);
32592 int imm_size;
32593 int num_imm_operand;
32594 int num_imm32_operand;
32595 int num_imm64_operand;
32596
32597 if (group == disp_no_group)
32598 return 0;
32599
32600 if (group == disp_imm)
32601 {
32602 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32603 &num_imm64_operand);
32604 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
32605 || num_imm_operand + window_list->num_imm > MAX_IMM
32606 || (num_imm32_operand > 0
32607 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
32608 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
32609 || (num_imm64_operand > 0
32610 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
32611 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
32612 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
32613 && num_imm64_operand > 0
32614 && ((window_list->num_imm_64 > 0
32615 && window_list->num_insn >= 2)
32616 || window_list->num_insn >= 3)))
32617 return BIG;
32618
32619 return 1;
32620 }
32621
32622 if ((group == disp_load_store
32623 && (window_list->num_loads >= MAX_LOAD
32624 || window_list->num_stores >= MAX_STORE))
32625 || ((group == disp_load
32626 || group == disp_prefetch)
32627 && window_list->num_loads >= MAX_LOAD)
32628 || (group == disp_store
32629 && window_list->num_stores >= MAX_STORE))
32630 return BIG;
32631
32632 return 1;
32633 }
32634
32635 /* This function returns true if insn satisfies dispatch rules on the
32636 last window scheduled. */
32637
32638 static bool
32639 fits_dispatch_window (rtx insn)
32640 {
32641 dispatch_windows *window_list = dispatch_window_list;
32642 dispatch_windows *window_list_next = dispatch_window_list->next;
32643 unsigned int num_restrict;
32644 enum dispatch_group group = get_insn_group (insn);
32645 enum insn_path path = get_insn_path (insn);
32646 int sum;
32647
32648 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
32649 instructions should be given the lowest priority in the
32650 scheduling process in Haifa scheduler to make sure they will be
32651 scheduled in the same dispatch window as the refrence to them. */
32652 if (group == disp_jcc || group == disp_cmp)
32653 return false;
32654
32655 /* Check nonrestricted. */
32656 if (group == disp_no_group || group == disp_branch)
32657 return true;
32658
32659 /* Get last dispatch window. */
32660 if (window_list_next)
32661 window_list = window_list_next;
32662
32663 if (window_list->window_num == 1)
32664 {
32665 sum = window_list->prev->window_size + window_list->window_size;
32666
32667 if (sum == 32
32668 || (min_insn_size (insn) + sum) >= 48)
32669 /* Window 1 is full. Go for next window. */
32670 return true;
32671 }
32672
32673 num_restrict = count_num_restricted (insn, window_list);
32674
32675 if (num_restrict > num_allowable_groups[group])
32676 return false;
32677
32678 /* See if it fits in the first window. */
32679 if (window_list->window_num == 0)
32680 {
32681 /* The first widow should have only single and double path
32682 uops. */
32683 if (path == path_double
32684 && (window_list->num_uops + 2) > MAX_INSN)
32685 return false;
32686 else if (path != path_single)
32687 return false;
32688 }
32689 return true;
32690 }
32691
32692 /* Add an instruction INSN with NUM_UOPS micro-operations to the
32693 dispatch window WINDOW_LIST. */
32694
32695 static void
32696 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
32697 {
32698 int byte_len = min_insn_size (insn);
32699 int num_insn = window_list->num_insn;
32700 int imm_size;
32701 sched_insn_info *window = window_list->window;
32702 enum dispatch_group group = get_insn_group (insn);
32703 enum insn_path path = get_insn_path (insn);
32704 int num_imm_operand;
32705 int num_imm32_operand;
32706 int num_imm64_operand;
32707
32708 if (!window_list->violation && group != disp_cmp
32709 && !fits_dispatch_window (insn))
32710 window_list->violation = true;
32711
32712 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32713 &num_imm64_operand);
32714
32715 /* Initialize window with new instruction. */
32716 window[num_insn].insn = insn;
32717 window[num_insn].byte_len = byte_len;
32718 window[num_insn].group = group;
32719 window[num_insn].path = path;
32720 window[num_insn].imm_bytes = imm_size;
32721
32722 window_list->window_size += byte_len;
32723 window_list->num_insn = num_insn + 1;
32724 window_list->num_uops = window_list->num_uops + num_uops;
32725 window_list->imm_size += imm_size;
32726 window_list->num_imm += num_imm_operand;
32727 window_list->num_imm_32 += num_imm32_operand;
32728 window_list->num_imm_64 += num_imm64_operand;
32729
32730 if (group == disp_store)
32731 window_list->num_stores += 1;
32732 else if (group == disp_load
32733 || group == disp_prefetch)
32734 window_list->num_loads += 1;
32735 else if (group == disp_load_store)
32736 {
32737 window_list->num_stores += 1;
32738 window_list->num_loads += 1;
32739 }
32740 }
32741
32742 /* Adds a scheduled instruction, INSN, to the current dispatch window.
32743 If the total bytes of instructions or the number of instructions in
32744 the window exceed allowable, it allocates a new window. */
32745
32746 static void
32747 add_to_dispatch_window (rtx insn)
32748 {
32749 int byte_len;
32750 dispatch_windows *window_list;
32751 dispatch_windows *next_list;
32752 dispatch_windows *window0_list;
32753 enum insn_path path;
32754 enum dispatch_group insn_group;
32755 bool insn_fits;
32756 int num_insn;
32757 int num_uops;
32758 int window_num;
32759 int insn_num_uops;
32760 int sum;
32761
32762 if (INSN_CODE (insn) < 0)
32763 return;
32764
32765 byte_len = min_insn_size (insn);
32766 window_list = dispatch_window_list;
32767 next_list = window_list->next;
32768 path = get_insn_path (insn);
32769 insn_group = get_insn_group (insn);
32770
32771 /* Get the last dispatch window. */
32772 if (next_list)
32773 window_list = dispatch_window_list->next;
32774
32775 if (path == path_single)
32776 insn_num_uops = 1;
32777 else if (path == path_double)
32778 insn_num_uops = 2;
32779 else
32780 insn_num_uops = (int) path;
32781
32782 /* If current window is full, get a new window.
32783 Window number zero is full, if MAX_INSN uops are scheduled in it.
32784 Window number one is full, if window zero's bytes plus window
32785 one's bytes is 32, or if the bytes of the new instruction added
32786 to the total makes it greater than 48, or it has already MAX_INSN
32787 instructions in it. */
32788 num_insn = window_list->num_insn;
32789 num_uops = window_list->num_uops;
32790 window_num = window_list->window_num;
32791 insn_fits = fits_dispatch_window (insn);
32792
32793 if (num_insn >= MAX_INSN
32794 || num_uops + insn_num_uops > MAX_INSN
32795 || !(insn_fits))
32796 {
32797 window_num = ~window_num & 1;
32798 window_list = allocate_next_window (window_num);
32799 }
32800
32801 if (window_num == 0)
32802 {
32803 add_insn_window (insn, window_list, insn_num_uops);
32804 if (window_list->num_insn >= MAX_INSN
32805 && insn_group == disp_branch)
32806 {
32807 process_end_window ();
32808 return;
32809 }
32810 }
32811 else if (window_num == 1)
32812 {
32813 window0_list = window_list->prev;
32814 sum = window0_list->window_size + window_list->window_size;
32815 if (sum == 32
32816 || (byte_len + sum) >= 48)
32817 {
32818 process_end_window ();
32819 window_list = dispatch_window_list;
32820 }
32821
32822 add_insn_window (insn, window_list, insn_num_uops);
32823 }
32824 else
32825 gcc_unreachable ();
32826
32827 if (is_end_basic_block (insn_group))
32828 {
32829 /* End of basic block is reached do end-basic-block process. */
32830 process_end_window ();
32831 return;
32832 }
32833 }
32834
32835 /* Print the dispatch window, WINDOW_NUM, to FILE. */
32836
32837 DEBUG_FUNCTION static void
32838 debug_dispatch_window_file (FILE *file, int window_num)
32839 {
32840 dispatch_windows *list;
32841 int i;
32842
32843 if (window_num == 0)
32844 list = dispatch_window_list;
32845 else
32846 list = dispatch_window_list1;
32847
32848 fprintf (file, "Window #%d:\n", list->window_num);
32849 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
32850 list->num_insn, list->num_uops, list->window_size);
32851 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
32852 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
32853
32854 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
32855 list->num_stores);
32856 fprintf (file, " insn info:\n");
32857
32858 for (i = 0; i < MAX_INSN; i++)
32859 {
32860 if (!list->window[i].insn)
32861 break;
32862 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
32863 i, group_name[list->window[i].group],
32864 i, (void *)list->window[i].insn,
32865 i, list->window[i].path,
32866 i, list->window[i].byte_len,
32867 i, list->window[i].imm_bytes);
32868 }
32869 }
32870
32871 /* Print to stdout a dispatch window. */
32872
32873 DEBUG_FUNCTION void
32874 debug_dispatch_window (int window_num)
32875 {
32876 debug_dispatch_window_file (stdout, window_num);
32877 }
32878
32879 /* Print INSN dispatch information to FILE. */
32880
32881 DEBUG_FUNCTION static void
32882 debug_insn_dispatch_info_file (FILE *file, rtx insn)
32883 {
32884 int byte_len;
32885 enum insn_path path;
32886 enum dispatch_group group;
32887 int imm_size;
32888 int num_imm_operand;
32889 int num_imm32_operand;
32890 int num_imm64_operand;
32891
32892 if (INSN_CODE (insn) < 0)
32893 return;
32894
32895 byte_len = min_insn_size (insn);
32896 path = get_insn_path (insn);
32897 group = get_insn_group (insn);
32898 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32899 &num_imm64_operand);
32900
32901 fprintf (file, " insn info:\n");
32902 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
32903 group_name[group], path, byte_len);
32904 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
32905 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
32906 }
32907
32908 /* Print to STDERR the status of the ready list with respect to
32909 dispatch windows. */
32910
32911 DEBUG_FUNCTION void
32912 debug_ready_dispatch (void)
32913 {
32914 int i;
32915 int no_ready = number_in_ready ();
32916
32917 fprintf (stdout, "Number of ready: %d\n", no_ready);
32918
32919 for (i = 0; i < no_ready; i++)
32920 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
32921 }
32922
32923 /* This routine is the driver of the dispatch scheduler. */
32924
32925 static void
32926 do_dispatch (rtx insn, int mode)
32927 {
32928 if (mode == DISPATCH_INIT)
32929 init_dispatch_sched ();
32930 else if (mode == ADD_TO_DISPATCH_WINDOW)
32931 add_to_dispatch_window (insn);
32932 }
32933
32934 /* Return TRUE if Dispatch Scheduling is supported. */
32935
32936 static bool
32937 has_dispatch (rtx insn, int action)
32938 {
32939 if (ix86_tune == PROCESSOR_BDVER1 && flag_dispatch_scheduler)
32940 switch (action)
32941 {
32942 default:
32943 return false;
32944
32945 case IS_DISPATCH_ON:
32946 return true;
32947 break;
32948
32949 case IS_CMP:
32950 return is_cmp (insn);
32951
32952 case DISPATCH_VIOLATION:
32953 return dispatch_violation ();
32954
32955 case FITS_DISPATCH_WINDOW:
32956 return fits_dispatch_window (insn);
32957 }
32958
32959 return false;
32960 }
32961
32962 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
32963 place emms and femms instructions. */
32964
32965 static enum machine_mode
32966 ix86_preferred_simd_mode (enum machine_mode mode)
32967 {
32968 /* Disable double precision vectorizer if needed. */
32969 if (mode == DFmode && !TARGET_VECTORIZE_DOUBLE)
32970 return word_mode;
32971
32972 if (!TARGET_AVX && !TARGET_SSE)
32973 return word_mode;
32974
32975 switch (mode)
32976 {
32977 case SFmode:
32978 return TARGET_AVX ? V8SFmode : V4SFmode;
32979 case DFmode:
32980 return TARGET_AVX ? V4DFmode : V2DFmode;
32981 case DImode:
32982 return V2DImode;
32983 case SImode:
32984 return V4SImode;
32985 case HImode:
32986 return V8HImode;
32987 case QImode:
32988 return V16QImode;
32989
32990 default:;
32991 }
32992
32993 return word_mode;
32994 }
32995
32996 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
32997 vectors. */
32998
32999 static unsigned int
33000 ix86_autovectorize_vector_sizes (void)
33001 {
33002 return TARGET_AVX ? 32 | 16 : 0;
33003 }
33004
33005 /* Initialize the GCC target structure. */
33006 #undef TARGET_RETURN_IN_MEMORY
33007 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
33008
33009 #undef TARGET_LEGITIMIZE_ADDRESS
33010 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
33011
33012 #undef TARGET_ATTRIBUTE_TABLE
33013 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
33014 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
33015 # undef TARGET_MERGE_DECL_ATTRIBUTES
33016 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
33017 #endif
33018
33019 #undef TARGET_COMP_TYPE_ATTRIBUTES
33020 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
33021
33022 #undef TARGET_INIT_BUILTINS
33023 #define TARGET_INIT_BUILTINS ix86_init_builtins
33024 #undef TARGET_BUILTIN_DECL
33025 #define TARGET_BUILTIN_DECL ix86_builtin_decl
33026 #undef TARGET_EXPAND_BUILTIN
33027 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
33028
33029 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
33030 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
33031 ix86_builtin_vectorized_function
33032
33033 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
33034 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
33035
33036 #undef TARGET_BUILTIN_RECIPROCAL
33037 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
33038
33039 #undef TARGET_ASM_FUNCTION_EPILOGUE
33040 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
33041
33042 #undef TARGET_ENCODE_SECTION_INFO
33043 #ifndef SUBTARGET_ENCODE_SECTION_INFO
33044 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
33045 #else
33046 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
33047 #endif
33048
33049 #undef TARGET_ASM_OPEN_PAREN
33050 #define TARGET_ASM_OPEN_PAREN ""
33051 #undef TARGET_ASM_CLOSE_PAREN
33052 #define TARGET_ASM_CLOSE_PAREN ""
33053
33054 #undef TARGET_ASM_BYTE_OP
33055 #define TARGET_ASM_BYTE_OP ASM_BYTE
33056
33057 #undef TARGET_ASM_ALIGNED_HI_OP
33058 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
33059 #undef TARGET_ASM_ALIGNED_SI_OP
33060 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
33061 #ifdef ASM_QUAD
33062 #undef TARGET_ASM_ALIGNED_DI_OP
33063 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
33064 #endif
33065
33066 #undef TARGET_PROFILE_BEFORE_PROLOGUE
33067 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
33068
33069 #undef TARGET_ASM_UNALIGNED_HI_OP
33070 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
33071 #undef TARGET_ASM_UNALIGNED_SI_OP
33072 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
33073 #undef TARGET_ASM_UNALIGNED_DI_OP
33074 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
33075
33076 #undef TARGET_PRINT_OPERAND
33077 #define TARGET_PRINT_OPERAND ix86_print_operand
33078 #undef TARGET_PRINT_OPERAND_ADDRESS
33079 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
33080 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
33081 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
33082 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
33083 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
33084
33085 #undef TARGET_SCHED_ADJUST_COST
33086 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
33087 #undef TARGET_SCHED_ISSUE_RATE
33088 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
33089 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
33090 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
33091 ia32_multipass_dfa_lookahead
33092
33093 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
33094 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
33095
33096 #ifdef HAVE_AS_TLS
33097 #undef TARGET_HAVE_TLS
33098 #define TARGET_HAVE_TLS true
33099 #endif
33100 #undef TARGET_CANNOT_FORCE_CONST_MEM
33101 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
33102 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
33103 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
33104
33105 #undef TARGET_DELEGITIMIZE_ADDRESS
33106 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
33107
33108 #undef TARGET_MS_BITFIELD_LAYOUT_P
33109 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
33110
33111 #if TARGET_MACHO
33112 #undef TARGET_BINDS_LOCAL_P
33113 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
33114 #endif
33115 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
33116 #undef TARGET_BINDS_LOCAL_P
33117 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
33118 #endif
33119
33120 #undef TARGET_ASM_OUTPUT_MI_THUNK
33121 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
33122 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
33123 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
33124
33125 #undef TARGET_ASM_FILE_START
33126 #define TARGET_ASM_FILE_START x86_file_start
33127
33128 #undef TARGET_DEFAULT_TARGET_FLAGS
33129 #define TARGET_DEFAULT_TARGET_FLAGS \
33130 (TARGET_DEFAULT \
33131 | TARGET_SUBTARGET_DEFAULT \
33132 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT \
33133 | MASK_FUSED_MADD)
33134
33135 #undef TARGET_HANDLE_OPTION
33136 #define TARGET_HANDLE_OPTION ix86_handle_option
33137
33138 #undef TARGET_OPTION_OVERRIDE
33139 #define TARGET_OPTION_OVERRIDE ix86_option_override
33140 #undef TARGET_OPTION_OPTIMIZATION
33141 #define TARGET_OPTION_OPTIMIZATION ix86_option_optimization
33142
33143 #undef TARGET_REGISTER_MOVE_COST
33144 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
33145 #undef TARGET_MEMORY_MOVE_COST
33146 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
33147 #undef TARGET_RTX_COSTS
33148 #define TARGET_RTX_COSTS ix86_rtx_costs
33149 #undef TARGET_ADDRESS_COST
33150 #define TARGET_ADDRESS_COST ix86_address_cost
33151
33152 #undef TARGET_FIXED_CONDITION_CODE_REGS
33153 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
33154 #undef TARGET_CC_MODES_COMPATIBLE
33155 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
33156
33157 #undef TARGET_MACHINE_DEPENDENT_REORG
33158 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
33159
33160 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
33161 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
33162
33163 #undef TARGET_BUILD_BUILTIN_VA_LIST
33164 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
33165
33166 #undef TARGET_ENUM_VA_LIST_P
33167 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
33168
33169 #undef TARGET_FN_ABI_VA_LIST
33170 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
33171
33172 #undef TARGET_CANONICAL_VA_LIST_TYPE
33173 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
33174
33175 #undef TARGET_EXPAND_BUILTIN_VA_START
33176 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
33177
33178 #undef TARGET_MD_ASM_CLOBBERS
33179 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
33180
33181 #undef TARGET_PROMOTE_PROTOTYPES
33182 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
33183 #undef TARGET_STRUCT_VALUE_RTX
33184 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
33185 #undef TARGET_SETUP_INCOMING_VARARGS
33186 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
33187 #undef TARGET_MUST_PASS_IN_STACK
33188 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
33189 #undef TARGET_FUNCTION_ARG_ADVANCE
33190 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
33191 #undef TARGET_FUNCTION_ARG
33192 #define TARGET_FUNCTION_ARG ix86_function_arg
33193 #undef TARGET_PASS_BY_REFERENCE
33194 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
33195 #undef TARGET_INTERNAL_ARG_POINTER
33196 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
33197 #undef TARGET_UPDATE_STACK_BOUNDARY
33198 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
33199 #undef TARGET_GET_DRAP_RTX
33200 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
33201 #undef TARGET_STRICT_ARGUMENT_NAMING
33202 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
33203 #undef TARGET_STATIC_CHAIN
33204 #define TARGET_STATIC_CHAIN ix86_static_chain
33205 #undef TARGET_TRAMPOLINE_INIT
33206 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
33207 #undef TARGET_RETURN_POPS_ARGS
33208 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
33209
33210 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
33211 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
33212
33213 #undef TARGET_SCALAR_MODE_SUPPORTED_P
33214 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
33215
33216 #undef TARGET_VECTOR_MODE_SUPPORTED_P
33217 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
33218
33219 #undef TARGET_C_MODE_FOR_SUFFIX
33220 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
33221
33222 #ifdef HAVE_AS_TLS
33223 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
33224 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
33225 #endif
33226
33227 #ifdef SUBTARGET_INSERT_ATTRIBUTES
33228 #undef TARGET_INSERT_ATTRIBUTES
33229 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
33230 #endif
33231
33232 #undef TARGET_MANGLE_TYPE
33233 #define TARGET_MANGLE_TYPE ix86_mangle_type
33234
33235 #undef TARGET_STACK_PROTECT_FAIL
33236 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
33237
33238 #undef TARGET_SUPPORTS_SPLIT_STACK
33239 #define TARGET_SUPPORTS_SPLIT_STACK ix86_supports_split_stack
33240
33241 #undef TARGET_FUNCTION_VALUE
33242 #define TARGET_FUNCTION_VALUE ix86_function_value
33243
33244 #undef TARGET_FUNCTION_VALUE_REGNO_P
33245 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
33246
33247 #undef TARGET_SECONDARY_RELOAD
33248 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
33249
33250 #undef TARGET_CLASS_LIKELY_SPILLED_P
33251 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
33252
33253 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
33254 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
33255 ix86_builtin_vectorization_cost
33256 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
33257 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
33258 ix86_vectorize_builtin_vec_perm
33259 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
33260 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
33261 ix86_vectorize_builtin_vec_perm_ok
33262 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
33263 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
33264 ix86_preferred_simd_mode
33265 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
33266 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
33267 ix86_autovectorize_vector_sizes
33268
33269 #undef TARGET_SET_CURRENT_FUNCTION
33270 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
33271
33272 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
33273 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
33274
33275 #undef TARGET_OPTION_SAVE
33276 #define TARGET_OPTION_SAVE ix86_function_specific_save
33277
33278 #undef TARGET_OPTION_RESTORE
33279 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
33280
33281 #undef TARGET_OPTION_PRINT
33282 #define TARGET_OPTION_PRINT ix86_function_specific_print
33283
33284 #undef TARGET_CAN_INLINE_P
33285 #define TARGET_CAN_INLINE_P ix86_can_inline_p
33286
33287 #undef TARGET_EXPAND_TO_RTL_HOOK
33288 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
33289
33290 #undef TARGET_LEGITIMATE_ADDRESS_P
33291 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
33292
33293 #undef TARGET_IRA_COVER_CLASSES
33294 #define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
33295
33296 #undef TARGET_FRAME_POINTER_REQUIRED
33297 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
33298
33299 #undef TARGET_CAN_ELIMINATE
33300 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
33301
33302 #undef TARGET_EXTRA_LIVE_ON_ENTRY
33303 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
33304
33305 #undef TARGET_ASM_CODE_END
33306 #define TARGET_ASM_CODE_END ix86_code_end
33307
33308 struct gcc_target targetm = TARGET_INITIALIZER;
33309 \f
33310 #include "gt-i386.h"